# -*- coding: utf-8 -*-
# ---------------------------------------------------------------------------
# create_new_warping_using_Landsat8_Sept2017version.py setup for "028" row imagery
# Last modification: 2017-11-22
# Description: reclassifying Landsat data from 16-bit down to 10-bit to avoid monster size principal component rasters with too many rows
# Uses method to convert Landsat files from normal to warped, compressed format to define best way to replace missing SLC gaps and small clouds
# Minimum pixel size set to 15m based on Band8 resolution, with 2**N multiples for N=0 to 5
# Pixel sizes therefore are 15, 30, 60, 120, 240, and 480 meters
# Import modules
import arcpy, os, numpy, string
from arcpy.sa import *
from arcpy import env
from datetime import date
from VAT_reader_new import VAT_reader
# Check out any necessary licenses
arcpy.CheckOutExtension("spatial")
### Load required toolboxes
arcpy.ImportToolbox(r"E:\MyPassport_original\FGD_April_8_2013\multires_invention_redo.tbx") # required for access to clip_alias

my_subs={} #maintains starting line number and parameter passed names info on all subroutines, no other uses at this time
my_subs[0]=("main","()",2398)
my_subs[1]=("date_check","(path,row,image_date)",109)
my_subs[2]=("name_matching","(simple,mask_dict,sat_type_local)",227)
my_subs[3]=("sub_dir_naming","()",260)
my_subs[4]=("identify_subdir","(sub_dir_listing,name2find)",273)
my_subs[5]=("identify_groups","(dir_in_special,bigD)",283)
my_subs[6]=("get_targets","(bigD,source_type)",294)
my_subs[7]=("check_upper_level","(file_design,upper_file_list_in)",468)
my_subs[8]=("check_lower_level","(calc_file_design,lower_file_list_in,defect_shortname)",512)
my_subs[9]=("make_se_tables","(bigD)",580)
my_subs[10]=("cutoff_list","(bigD)",625)
my_subs[11]=("show_connections","(some_source)",681)
my_subs[12]=("use_walk","(a_target,switch)",691)
my_subs[13]=("next_stats","(bigD)",710)
my_subs[14]=("std_stats","(calc_files,run_num_includesXX0,bigD)",977)
my_subs[15]=("validate_current","(bigD)",1038)
my_subs[16]=("validate_start","(bigD)",1197)
my_subs[17]=("validate","(bigD)",1258)
my_subs[18]=("builder","(bigD)",1272)
my_subs[19]=("first_things_first","(sys_arguments)",1344)
my_subs[20]=("wrap_up","(bigD)",1353)
my_subs[21]=("normalize","(bigD)",1462)
my_subs[22]=("norm_work","(success,currVATdict,diff_file)",1513)
my_subs[23]=("export2excel","(bigD)",1724)
my_subs[24]=("do_pc2and3","(bidD)",1797)
my_subs[25]=("alt_normal","(bigD)",1853)
my_subs[26]=("alt_export2excel","(bigD)",1945)
my_subs[27]=("need2create1st","(bigD)",2013)
my_subs[28]=("canskip2real","(bigD)",2192)
my_subs[29]=("repair_targets","(bigD,methodcomp,methodnum)",2217)
my_subs[30]=("final_report","(bigD,methodcomp,methodnum)",2292)
##my_subs[31]=("date_check",68) # changing order of rasters to make gaps to try to solve sliver missing in Band8

### Local variables:
arcpy.env.overwriteOutput = True
arcpy.env.cellSize = 15
please_overwrite_data=0 # or 0 to leave the raster clips intact where they still exist
bigD={} #dictionary to hold items passing between function and main program
### starts out with just default directory names and add more work files and status flags as they get made
bigD["s_top"] = "F:\\LaCie2TB\\Landsat\\046x028\\" #
bigD["s_in"] = "20130703\\" # inner source options "20170730\\ 20160812\\ 20150826\\ 20140706\\ 20130703\\ 
bigD["s_short"] = bigD["s_in"][:bigD["s_in"].find("\\")] #was = source_short
bigD["d_top"] = "F:\\LaCie2TB\\Landsat\\046x028\\" #was = defect_topmost 046x028\\ 027x029\\ 118x041\\
bigD["d_in"] = "20131015\\" # 20170706 20170722 20170807 for 20170730
# 20170706 20170722 20170807 for 20170730 # 20150223\\ 20150615\\ 20150701\\ 20150717\\ 20150818\\ 20150919\\ 20151005\\
#"20030902\\ 20040429\\ 20040616\\ 20060724\\20080627\\ 20090529\\ 20090817\\ 20100414\\ 20100820\\ 20130422\\LE70460282013112EDC00\\
#####"20080627\\ 20090529\\ 20090817\\ 20100414\\ 20100820\\ 20110519\\ 20110823\\ 20120708\\   
#####"20120926\\LE70460292012270EDC00\\ "20130422\\LE70460292013112EDC00\\" - some Landsat7 files are nested deeper than the simpler ones
bigD["d_short"] = bigD["d_in"][:bigD["d_in"].find("\\")] #was = defect_short
bigD["repair"]=(False,1) #(False,14)(True,14) # 'True' manually sets trigger for additional calcs not done in earlier version of program 'za' = #2, skip 'xx' as it is special
bigD["where2start"]=0 #1 #might need to be 0 to do the initial raster builds in 'xx' subdirectory and at the top level
bigD["expect_num"]=49 #47 before adding Jan. 30, 2018 fix #a finished case will have 47 GRID files in better_list (48 subdirectories including the INFO before more workfiles were added
bigD["incomplete"]=-1 #will hold number of first subdir found with unfinished calculations
bigD["fixme"]=False #True #False # set to True to repair problem with absence of mYYMMDDtest check halfway thru each set of calculations
bigD["overwrite_data"]=False
bigD["overwrite_summaries"]=False #only ok to set to 'True' if subdirs already exist
bigD["current_fix"]=1 # ran an extra round in 'za' when started as 0
bigD["builder_has_run"]=False
bigD["jump2real"]=(False,True) # (try skipping, do real bands) allow running of real missing calcs without recreating all the intermiates - just need the \zz subdirectory
###############bigD["extras_used"]=""
##########bigD["path_row"]=("118","041")# need to consolidate use of this variable - need to make setting this variable "LIVE"
###entries added later to dictionary 'bigD' include:
###these from 'get_targets'
##bigD[(source_type,"upper_files")]=upper_file_list
##bigD[(source_type,"lower_files")]=lower_file_list
##bigD[(source_type,"better_upper")]=check_upper_level(calc_file_locals,upper_file_list) #was = better_upper_list
##bigD[(source_type,"better_lower")]=check_lower_level(calc_file_lower,lower_file_list,other_short) #was = better_lower_list
##bigD[(source_type,"image_bands")]
###these from 'make_se_tables'
##bigD["full_se_dict"]
##bigD["summary_count"]
##bigD["accumulator"]
##bigD["accume2"]
##bigD["accume3"]
###this is from 'cutoff_list'
##bigD["output_cutoffs"]
###these from 'validate_start'
##bigD["all_good"]
##bigD["subdir_good"]
##bigD["good_subdirs"]
###these from 'validate_current'
##bigD["status_current"]=True or False
##bigD["where2start"]=highest_number #changing name from highest_number to where2start
##bigD["final_dir_full"]
##bigD["this_dir"]

def date_check(path,row,image_date): # path and row passed as strings to be converted to integers, date passed as string YYYYMMDD
    valid_date={}
    valid_date["L5",27]=date(2003,7,3) #* # also matches Landsat 8 orbit. this is just for southern Minnesota PathRow
    valid_date["L7",27]=date(2003,6,25) #* #date of Landsat7 Scan Line Correction failure
    valid_date["L5",46]=date(2003,6,6) #this is just for western Oregon PathRow
    valid_date["L7",46]=date(2003,5,29) #date of Landsat7 Scan Line Correction failure
    valid_date["final_L5"]=date(2013,1,6) #last image from Landsat5 on any path
    valid_date["first_L8"]=date(2013,3,18) #first image from Landsat8 on any path
    valid_date["L5",118]=date(2003,5,30) #(2007,2,2) #this is just for Wenzhou China PathRow
    valid_date["L7",118]=date(2003,5,22) #(2007,5,1) #this is just for Wenzhou China PathRow
    valid_date["L5",35]=date(2003,6,25) #this is just for Sonoran Desert PathRow
    valid_date["L7",35]=date(2003,7,3) #this is just for Sonoran Desert PathRow
    
    test_date=date(int(image_date[:4]),int(image_date[4:6]),int(image_date[6:])) #convert image string date to Python time value
    if int(path)==35:
        if int(row) in (38,39,40):
            diffL5date=test_date-valid_date["L5",35]
            diffL7date=test_date-valid_date["L7",35]
            quotientL5,remainderL5=divmod(diffL5date.days,16)
            quotientL7,remainderL7=divmod(diffL7date.days,16)
            if remainderL5==0:
                if test_date<valid_date["final_L5"]:
                    return ("L5",str(quotientL5))
                elif test_date>valid_date["first_L8"]:
                    return ("L8",str(quotientL5))
                else:
                    return ("not_okay_date",image_date)
            elif remainderL7==0: # quotient is positive before SLC failure, negative afterwards
                if quotientL7<0:
                    return ("L7pre",str(quotientL7))
                elif quotientL7>=0:
                    return ("L7post",str(quotientL7)) # quotient is positive before SLC failure, negative afterwards
                else:
                    pass # should never get here
            else:
                print str(diffL5date)
                print str(diffL7date)
                print str(quotientL5)
                print str(quotientL7)
                print str(remainderL5)
                print str(remainderL7)
                print path,row,image_date
                return ("not_okay_date",image_date)
    if int(path)==118:
        if int(row) in (40,41,42):
            diffL5date=test_date-valid_date["L5",118]
            diffL7date=test_date-valid_date["L7",118]
            quotientL5,remainderL5=divmod(diffL5date.days,16)
            quotientL7,remainderL7=divmod(diffL7date.days,16)
            if remainderL5==0:
                if test_date<valid_date["final_L5"]:
                    return ("L5",str(quotientL5))
                elif test_date>valid_date["first_L8"]:
                    return ("L8",str(quotientL5))
                else:
                    return ("not_okay_date",image_date)
            elif remainderL7==0: # quotient is positive before SLC failure, negative afterwards
                if quotientL7<0:
                    return ("L7pre",str(quotientL7))
                elif quotientL7>=0:
                    return ("L7post",str(quotientL7)) # quotient is positive before SLC failure, negative afterwards
                else:
                    pass # should never get here
            else:
                print str(diffL5date)
                print str(diffL7date)
                print str(quotientL5)
                print str(quotientL7)
                print str(remainderL5)
                print str(remainderL7)
                print path,row,image_date
                return ("not_okay_date",image_date)
    if int(path)==46:
        if int(row) in (28,29,30):
            diffL5date=test_date-valid_date["L5",46]
            diffL7date=test_date-valid_date["L7",46]
            quotientL5,remainderL5=divmod(diffL5date.days,16)
            quotientL7,remainderL7=divmod(diffL7date.days,16)
            if remainderL5==0:
                if test_date<valid_date["final_L5"]:
                    return ("L5",str(quotientL5))
                elif test_date>valid_date["first_L8"]:
                    return ("L8",str(quotientL5))
                else:
                    return ("not_okay_date",image_date)
            elif remainderL7==0: # quotient is positive before SLC failure, negative afterwards
                if quotientL7<0:
                    return ("L7pre",str(quotientL7))
                elif quotientL7>=0:
                    return ("L7post",str(quotientL7)) # quotient is positive before SLC failure, negative afterwards
                else:
                    pass # should never get here
            else:
                return ("not_okay_date",image_date)
    if int(path)==27:
        if int(row) in (28,29,30):
            diffL5date=test_date-valid_date["L5",27]
            diffL7date=test_date-valid_date["L7",27]
            quotientL5,remainderL5=divmod(diffL5date.days,16)
            quotientL7,remainderL7=divmod(diffL7date.days,16)
            if remainderL5==0:
                if test_date<valid_date["final_L5"]:
                    return ("L5",str(quotientL5))
                elif test_date>valid_date["first_L8"]:
                    return ("L8",str(quotientL5))
                else:
                    return ("not_okay_date",image_date)
            elif remainderL7==0: # quotient is positive before SLC failure, negative afterwards
                if quotientL7<0:
                    return ("L7pre",str(quotientL7))
                elif quotientL7>=0:
                    return ("L7post",str(quotientL7)) # quotient is positive before SLC failure, negative afterwards
                else:
                    pass # should never get here
            else:
                return ("not_okay_date",image_date)
    return ("not_in_area",image_date) # end of date_check function - return values are always two strings    
    
def name_matching(simple,mask_dict,sat_type_local): #unclear if adding Landsat8 changes needs from this function?
    band_num = {0:0,1:7,2:0,3:3,4:3,5:3,6:3,7:3,8:3}
    if sat_type_local=="L5":
        sat_bands=7
    elif sat_type_local[:2]=="L7":
        sat_bands=9
    elif sat_type_local=="L8":
        sat_bands=11
    else:
        pass #unreachable
    if simple==mask_dict[0]:
        return (0,"nothing_else","no_more")
    if simple==mask_dict[2]:
        return (2,"nothing_else","no_more")
    for a_band in range(1,sat_bands+1):
        if simple==mask_dict[1][:1]+str(a_band)+mask_dict[1][2:]:
            return (1,str(a_band),"no_more")
        if simple==mask_dict[3][:9]+str(a_band):
            return (3,str(a_band),"no_more")
        if a_band<4:
            for compress in ("4","8","16","32","64"):    
                if simple==mask_dict[4][:8]+str(a_band)+mask_dict[4][9:]+compress:
                    return (4,str(a_band),compress)            
                if simple==mask_dict[5][:8]+str(a_band)+mask_dict[5][9:]+compress:
                    return (5,str(a_band),compress)
                if simple==mask_dict[6][:8]+str(a_band)+mask_dict[6][9:]+compress:
                    return (6,str(a_band),compress)            
                if simple==mask_dict[7][:8]+str(a_band)+mask_dict[7][9:]+compress:
                    return (7,str(a_band),compress)        
                if simple==mask_dict[8][:8]+str(a_band)+mask_dict[8][9:]+compress:
                    return (8,str(a_band),compress)
    return (-1,"error","nothing") # trap errors if file was not one of the targets - end of name_matching function

def sub_dir_naming(): # initially just 25 sub directories to test statistical properties of mapping system
    #standard convention for names of subdirectories
    #first incarnation only is for statistical properties check - replace QR with these characters from subdir names
    sub_dirs={0:"xx\\",1:"za\\",2:"yb\\",51:"zz\\",52:"__"} #wc, vd, ue etc.
    for rest in range(3,26): # tentative assumption that there will only need to be 25 separate runs to gradually fill in the gaps
        sub_dirs[rest]=chr(122-rest)+chr(96+rest)+"\\"
        #print rest,sub_dirs[rest]
    for rest in range(26,51): # tentative assumption that there will only need to be 25 separate runs to gradually fill in the gaps
        sub_dirs[rest]=chr(148-rest)+chr(72+rest)+"\\" #zb,yc,xd,we,vf,ug
        #print rest,sub_dirs[rest]
    #print sub_dirs
    return sub_dirs

def identify_subdir(sub_dir_listing,name2find):
    #assume 52 possible subdirectories within the "special" subdir itself
    for subdir_num in range(52):
        if subdir_num==51: pass #print str(51),name2find
        if subdir_num in sub_dir_listing:
            if sub_dir_listing[subdir_num][:-1]==name2find: #strip off the trailing \\
                #print name2find,str(subdir_num)
                return (True,subdir_num)
    return (False,-1) #trigger to indicate name was not a valid subdirectory within "special" subdirectory

def identify_groups(dir_in_special,bigD):
    #statYYYYMMDD is used to test statistical properties of some other mapping to this particular SLC defect Landsat7 imagery
    #other directory groups will need to be detected to handle actual fixing of SLC defect or partly cloudy images
    this_defect=bigD["d_short"]
    if this_defect[-1:]=="\\": this_defect=bigD["d_short"][:-1] #strip off the directory divider symbol
    if dir_in_special[:4]=="stat" and dir_in_special[4:]==this_defect: #only capture file info for the defect date being tested
        print "now at line 186",dir_in_special
        return True,dir_in_special
    else:
        return False,"not a recognized directory group"
    
def get_targets(bigD,source_type): #(path2map,map_dir,source_type,other_short): # handles verifying filename idiosyncrasies    
    #path2map_source,map_source_dir,path2defect,defect_source_dir
    #create subdirectory "special" to hold calculations and further subdirectories if necessary
    #otherwise check for which calculations have already been done
    ####if source_type=="source":
    print "source_type is ",source_type
    if source_type=="defect":
        path2map=bigD["d_top"]
        map_dir=bigD["d_in"]
        other_short=bigD["s_short"]
    elif source_type=="source":
        path2map=bigD["s_top"]
        map_dir=bigD["s_in"]
        other_short=bigD["d_short"]
    else:
        print "mistake to be here at line 205"
    print "line 206 in 'get_targets'"
    (path_str,row_str)=path2map[-8:-5],path2map[-4:-1]
    print "line 310 path_str, row_str= ",path_str,row_str
    if len(path2map)>28:
        bs={0:0}
        bs_spot=0
        bs_num=0
        while bs_spot>=0:
            bs_spot=path2map.find("\\",bs_spot+1)
            bs_num+=1 #1,2,3,4,5,6 may be the 1 more than really exists count
            bs[bs_num]=bs_spot #{0:0,1:2,2:11,3:19,4:27,5:41,6:-1}
        print str(bs_num)
        print bs
        if bs[bs_num-2]-bs[bs_num-3]==8:
            (path_str,row_str)=path2map[bs[bs_num-3]+1:bs[bs_num-3]+4],path2map[bs[bs_num-3]+5:bs[bs_num-3]+8]
            print "line 322 bs_num,path_str, row_str= ",str(bs_num),path_str,row_str
        else:
            print "errors at line 324, path_str, row_str = ",path_str,row_str
            print "for casea of path2map = ",path2map
    #(path_str,row_str)=path2map[-8:-5],path2map[-4:-1]
    bigD["path_row"]=(path_str,row_str)
    print bigD["path_row"]
    ##bigD["d_top"] = "F:\\LaCie2TB\\Landsat\\118x041\\" #was = defect_topmost 046x028\\ 027x029\\
    sub_dir_list=sub_dir_naming() #return dictionary of standard subdirectory naming conventions
    series = {0:"L5",1:"L7pre",2:"L7post",3:"L8"} # pre or post to Landsat 7 Scan Line Correction Failure on May 31, 2003
    calc_file_base = {0:"mYYMMDDtest",1:"bNzYYYYMMDD",2:"pcYYMMDD",3:"pcYYMMDDcB",4:"pYYMMDDcBmn",5:"pYYMMDDcBsd",6:"pYYMMDDcBmi",\
                            7:"pYYMMDDcBsi",8:"pYYMMDDcBse"}
    calc_file_deeper = {9:"selR_cBout",10:"usalRYYMMDDcB",11:"alRYYMMDDok",12:"edgeYYMMDDalR",13:"nYYMMDDQyymmdd",\
                        14:"xBYYMMDDyymmdd",15:"QfixpcBx",16:"origpcBQ",17:"Qdifferx",18:"QfixpcBge4all",\
                        19:"pcBcutmi",20:"pcBcutmn",21:"Qdifferall",22:"Qmethodnum",23:"Qmethodcomp",\
                        24:"QpixpcBge4all",25:"Qmisserall1"} # found only in deeper subdirectories
    upper_file_list = {}
    lower_file_list = {}
    calc_file_lower = {}    
    for index in range(9,26): #now up to 26 types of workfiles
        calc_file_lower[index]=calc_file_deeper[index]
    possible_date=map_dir[:8]
    sat_type,sat_status=date_check(path_str,row_str,possible_date)# need to consolidate path/row nomenclature
    if sat_type=="L5":
        series_num=0
    elif sat_type=="L8":
        series_num=3
    elif sat_type=="L7pre":
        series_num=1
    elif sat_type=="L7post":
        series_num=2
    else:
        print "apparent image date is not L5, L7, or L8 for this location"
        print sat_type,sat_status,possible_date
        halt_now = 1/0 # error handling during program writing is to make error fatal
    bigD[source_type,"satellite"]=(sat_type,sat_status,series_num)
    print "\n",series[series_num],"for apparent image date = ",possible_date # *** series num or sat_type is a RETURN item***
    calc_file_lower[10]=calc_file_deeper[10][:5]+possible_date[2:]+calc_file_deeper[10][11:] # replace with real map source date
    calc_file_lower[11]=calc_file_deeper[11][:3]+possible_date[2:]+calc_file_deeper[11][9:]  # replace with real map source date
    calc_file_lower[12]=calc_file_deeper[12][:4]+possible_date[2:]+calc_file_deeper[12][10:]
    calc_file_lower[13]=calc_file_deeper[13][:1]+possible_date[2:]+calc_file_deeper[13][7:]
    calc_file_lower[14]=calc_file_deeper[14][:2]+possible_date[2:]+calc_file_deeper[14][8:] # still need Q,R replacement
    
    calc_file_locals = {} # match names to the 6 or 8 character image date
    calc_file_locals[1]=calc_file_base[1][:3]+possible_date[:]
    for cases in (0,4,5,6,7,8):
        calc_file_locals[cases]=calc_file_base[cases][:1]+possible_date[2:]+calc_file_base[cases][7:]
    for cases in (2,3):
        calc_file_locals[cases]=calc_file_base[cases][:2]+possible_date[2:]+calc_file_base[cases][8:]
    if series_num==3: calc_file_locals[2]=calc_file_base[2][:1]+"d"+possible_date[2:]+calc_file_base[2][8:] 
    #print calc_file_locals # turned off Nov. 15, 2017   
    image_bands = {}
    special_exists=False # directory "special" could be simply mapping source, or also statistical test case with alternate defects
    ##extras_present=False
    a_list=os.listdir(path2map+map_dir)
##    if ("extras" in a_list) and source_type=="defect":
##        a_list=os.listdir(path2map+map_dir+"extras\\") # need to dig one layer deeper for mixed level 1 and 2 rasters
##        extras_present=True
##        bigD["extras_used"]="extras\\"
##        extras="extras\\"
##        print "extras\\ subdirectory recognized at line 381"
##    else:
##        extras=""
##        print a_list
##        print source_type
##        ##haltnow=1/0
    print "This is file and subdirectory list for ",path2map+map_dir   #+extras
    for anything in a_list:
        good2go=False
        ##if extras_present:
        if anything[-4:]==".TIF": # may need to handle other types of files
            if anything[-6:-5]=="B":
                index=anything[-6:-4]
                good2go=True
                ##image_bands[index]=anything
            elif anything[-10:-6]=="VCID":
                if anything[-5:-4]=="1":
                    index="B6"
                    good2go=True
                elif anything[-5:-4]=="2":
                    index="B9"
                    good2go=True
                else:
                    print "stopping at line 400"
                    haltnow=1/0
                ##image_bands[index]=anything
            elif anything[-7:-6]=="B" and not anything[-7:-4]=="BQA" and not anything[-6:-5]=="6":
                index=anything[-7:-4]
                
##            else:
##                pass
##                elif anything[-7:-6]=="B" and not anything[-7:-4]=="BQA":
##                    index=anything[-7:-4]
##                image_bands[index]=anything # putting TIF files into a dictionary to return
        ##else: #not dealing with 'extras\' deeper subdirectory
            ##if anything[-4:]==".TIF": # may need to handle other types of files
##                if anything[-6:-5]=="B":
##                    index=anything[-6:-4]
##                elif anything[-7:-6]=="B" and not anything[-7:-4]=="BQA":
##                    if anything[-5:-4]=="0": 
##                        index=anything[-7:-5]
##                    elif anything[-5:-4]=="1" or anything[-5:-4]=="2": #handles B61 and B62 names differently than 3 lines down
##                        index=anything[-7:-4]
##                elif anything[-13:-12]=="B":
##                    index=anything[-13:-4]
##                else:
##                    index=anything[:-4]
##                image_bands[index]=anything # putting TIF files into a dictionary to return
        elif anything[-10:-8]=="wc": #handling GRID files renamed to start with "wc" - code added Feb. 13, 2018
            if anything[-2:-1]=="b" or anything[-2:-1]=="B":
                index="B"+anything[-1:]
                good2go=True
##            elif anything[-7:-6]=="B":
##                if anything[-5:-4]=="0": 
##                    index=anything[-7:-5]
##                elif anything[-5:-4]=="1" or anything[-5:-4]=="2": #handles B61 and B62 names differently than 3 lines down
##                    index=anything[-7:-4]
##            elif anything[-13:-12]=="B":
##                index=anything[-13:-4]
        else:
            index=anything[:]
        if good2go:
            image_bands[index]=anything # putting TIF files into a dictionary to return
            print "line 440",index,anything
        if anything=="special":
            special_exists=True
            print "subdirectory named 'special' already exists with the following contents"
            next_list=os.listdir(path2map+map_dir+anything)
            for something in next_list:
                if os.path.isdir(path2map+map_dir+anything+"\\"+something):
                    #print something # TIF files use value of 0 for missing data and sometimes also for real data
                    dig_deeper,group_dir=identify_groups(something,bigD) #test for whether this subdir name is standard for holding deeper levels
                    calc_file_num,further_info,more=name_matching(something,calc_file_locals,sat_type) # ??????????????????????????
                    #print str(calc_file_num),further_info,more # - no need to print out once it is working OK
                    upper_file_list[(calc_file_num,further_info,more)]=something
                    #dig_deeper,group_dir=identify_groups(something,bigD)
                    if dig_deeper:
                        print "at line 289 for ",something,dig_deeper,group_dir
                        deeper_list=os.listdir(path2map+map_dir+anything+"\\"+something) # make a list of subdirs inside of statYYYYMMDD
                        for a_deeper_case in deeper_list:
                            subdirfound,sub_number=identify_subdir(sub_dir_list,a_deeper_case) #possibly error source
                            if subdirfound:
                                lowest_count=0
                                #print "stat analysis subdir exists named: ",something,"with deeper subdir = ",a_deeper_case
                                upper_file_list[("statistics",group_dir,sub_number)]=a_deeper_case
                                lower_list=os.listdir(path2map+map_dir+anything+"\\"+something+"\\"+a_deeper_case)
                                for lower_things in lower_list:
                                    if os.path.isdir(path2map+map_dir+anything+"\\"+something+"\\"+a_deeper_case+"\\"+lower_things):
                                        lowest_count+=1
                                        #print "file within here = ",lower_things # turned off Nov. 15, 2017
                                        lower_file_list[(sub_number,lowest_count)]=(something,a_deeper_case,lower_things)
                                        if a_deeper_case[:2]=="zz": print something,a_deeper_case,lower_things,str(sub_number),str(lowest_count)
            print
        elif os.path.isdir(path2map+map_dir+anything):
            print "subdirectory exists named: ",anything
            next_list=os.listdir(path2map+map_dir+anything)
            for something in next_list:
                if os.path.isdir(path2map+map_dir+anything+"\\"+something):
                    pass
                    #print something # TIF files use value of 0 for missing data and sometimes also for real data
    #print image_bands
    #print upper_file_list # no more need to print out 
    print
    band_keys=image_bands.keys()
    band_keys.sort()
    up_list_keys=upper_file_list.keys()
    up_list_keys.sort()
    print "this is the image_band_list from line 484 in get_targets"
    for a_key in band_keys: #list of original mapping source Landsat bands without defects
        print a_key,image_bands[a_key]
    print
##    print "this is the upper_file_list from line 323 in get_targets"
##    for a_key in up_list_keys: # list of N by N compression files used to create mapping
##        print a_key,upper_file_list[a_key]
    print #also need to identify various possible conversion files
    #print lower_file_list
    bigD[(source_type,"upper_files")]=upper_file_list
    bigD[(source_type,"lower_files")]=lower_file_list
    bigD[(source_type,"better_upper")]=check_upper_level(calc_file_locals,upper_file_list) #was = better_upper_list
    bigD[(source_type,"better_lower")]=check_lower_level(calc_file_lower,lower_file_list,other_short) #was = better_lower_list
    bigD[(source_type,"image_bands")]=image_bands
##    if source_type=="defect":
##        haltnow=1/0
    #stat_dir,deep_dir,better_lower_list=check_lower_level(calc_file_lower,lower_file_list)
    #return (image_bands,better_upper_list,better_lower_list) # end of get_targets function
    return bigD # end of 'get_targets' function

def check_upper_level(file_design,upper_file_list_in):
    #{0:"mYYMMDDtest",1:"bNzYYYYMMDD",2:"pcYYMMDD",3:"pcYYMMDDcB",4:"pYYMMDDcBmn",5:"pYYMMDDcBsd",6:"pYYMMDDcBmi",\
    #                        7:"pYYMMDDcBsi",8:"pYYMMDDcBse"}
    #print file_design
    checked_list={}
    actions=0
    compress_levels={6:"1",1:"4",2:"8",3:"16",4:"32",5:"64"}
    file_types={0:1,1:"B",2:1,3:"B",4:"BZ",5:"BZ",6:"BZ",7:"BZ",8:"BZ"}
    band_sizes={0:1,1:8,2:1,3:8,4:3,5:3,6:3,7:3,8:3} # only need 7 bands for Landsat5 and 8 for Landsat7
    compress_sizes={0:1,1:1,2:1,3:1,4:5,5:5,6:5,7:5,8:5}
    slice_band_start={0:11,1:1,2:8,3:9,4:8,5:8,6:8,7:8,8:8}
    slice_band_end={0:11,1:2,2:8,3:10,4:9,5:9,6:9,7:9,8:9}
    slice_compress_start={0:99,1:99,2:99,3:99,4:11,5:11,6:11,7:11,8:11}
    #print upper_file_list_in # turned off Nov. 15, 2017
    list_in_key=upper_file_list_in.keys()
    list_in_key.sort()
    for a_key in list_in_key: # pull in one file at a time to check where it matches
        (designs,bands,compressions)=a_key
        a_name=upper_file_list_in[a_key] # filename is value held in this dictionary
        if designs=="statistics":
            checked_list[(designs,bands,compressions)]=a_name
            actions+=1
        else:         
            for designs in range(0,9):
                    band_size=band_sizes[designs]
                    compression=compress_sizes[designs] #special case of 1 has no added compression levels
                    test_name=file_design[designs]
                    for a_band_size in range(1,band_size+1):
                        for compressing in range(1,compression+1):
                            if compression==1 and band_size>1:
                                this_name=test_name[:slice_band_start[designs]]+str(a_band_size)+test_name[slice_band_end[designs]:] # cases 1,3
                            elif compression==1 and band_size==1:   
                                this_name=test_name # nothing to add for cases 0,2 
                            elif compression>1 and band_size==1:
                                this_name=test_name+compress_levels[compressing]# handles no cases 
                            else: #must have both band numbers and compression levels, cases 4,5,6,7,8
                                this_name=test_name[:slice_band_start[designs]]+str(a_band_size)+test_name[slice_band_end[designs]:]+\
                                           compress_levels[compressing]
                            if this_name==a_name:
                                checked_list[(designs,a_band_size,compress_levels[compressing])]=this_name
                                actions+=1
    print "times through to capture of top level =",str(actions)
    return checked_list

def check_lower_level(calc_file_design,lower_file_list_in,defect_shortname):
    ###{9:"selR_cBout",10:"usalRYYMMDDcB",11:"alRYYMMDDok",12:"edgeYYMMDDalR",13:"nYYMMDDQymmdd",\ #design 14 is only necessary at XX level
    ### 14:"xBYYMMDDyymmdd",15:"QfixpcBx",16:"origpcBQ",17:"Qdifferx,18:"QfixpcBge4all",19:"pcBcutmi",20:"pcBcutmn",21:"Qdifferall",\
    ##22:"Qmethodnum",23:"Qmethodcomp"}
    ###YYMMDD is filled in before calling this function
    output_dir={}
    copy_list={}
    actions=0
    even_shorter=defect_shortname[5:]+defect_shortname[2:4]
    calc_file_design[13]=calc_file_design[13][:8]+even_shorter #"42213" #need to make this subdir link live
    calc_file_design[14]=calc_file_design[14][:8]+even_shorter #"42213" #need to make this subdir link live
    compress_levels={6:"1",1:"4",2:"8",3:"16",4:"32",5:"64"}
    file_types={9:"BZ",10:"B",11:"Z",12:1,13:1,14:"B",15:"BZ",16:"BZ",17:"Z",18:"B",19:"BZ",20:"BZ",21:1,22:1,23:1,24:"B",25:1}
    band_sizes={9:3,10:3,11:1,12:1,13:1,14:3,15:3,16:3,17:1,18:3,19:3,20:3,21:1,22:1,23:1,24:3,25:1}
    compress_sizes={9:5,10:1,11:5,12:1,13:1,14:1,15:6,16:6,17:6,18:1,19:5,20:5,21:1,22:1,23:1,24:1,25:1}
    slice_band_start={9:6,10:12,11:11,12:13,13:13,14:1,15:6,16:6,17:8,18:6,19:2,20:2,21:10,22:10,23:11,24:6,25:11}
    slice_band_end={9:7,10:13,11:11,12:13,13:13,14:2,15:7,16:7,17:8,18:7,19:3,20:3,21:10,22:10,23:11,24:7,25:11}
    slice_compress_start={9:10,10:99,11:11,12:99,13:99,14:99,15:8,16:8,17:8,18:99,19:99,20:99,21:99,22:99,23:99,24:99,25:99}
    std_subdir_list=sub_dir_naming() # only needs to be called once - might eventually move to a global variable section - len = 52
    list_in_key=lower_file_list_in.keys()
    list_in_key.sort()
##    for a_key in list_in_key:
##        copy_list[a_key]=lower_file_list_in[a_key]
    #special case for design=21, subdir='zz' number 51
    for a_key in list_in_key: #this gets just a single entry in the very large dictionary fed into the function
        (stat_dir,deep_case,list_in_files)=lower_file_list_in[a_key]
        if list_in_files=="differgal": print stat_dir,deep_case,list_in_files,a_key
        for a_std_num in range(len(std_subdir_list)): #dictionary of number:"QR"
            a_std_name =std_subdir_list[a_std_num] #QR possible oops, this includes the trailing \\
            for designs in range(9,26): #need to set a global variable for here and in 'get_targets'
                band_size=band_sizes[designs]
                compression=compress_sizes[designs] #special case of 1 has no added compression levels
                test_name=calc_file_design[designs]
                for a_band_size in range(1,band_size+1):
                    for compressing in range(1,compression+1):
                        if compression==1 and band_size>1:
                            this_name=test_name[:slice_band_start[designs]]+str(a_band_size)+test_name[slice_band_end[designs]:] # handles 10,14,21
                        elif compression==1 and band_size==1:
                            this_name=test_name # nothing to add for cases 12,13
                        elif compression>1 and band_size==1:
                            this_name=test_name+compress_levels[compressing]# handles cases 11,16
                        else: #must have both band numbers and compression levels, cases 9,15,17
                            this_name=test_name[:slice_band_start[designs]]+str(a_band_size)+test_name[slice_band_end[designs]:]+\
                                       compress_levels[compressing]
                        this_nameQ=this_name.replace("Q",a_std_name[:1])
                        this_nameQR=this_nameQ.replace("R",a_std_name[1:2])
                        #print this_nameQR
                        a_name=list_in_files
                        ###if designs==18: print str(designs)+"design, name is: "+this_nameQR
                        if this_nameQR==a_name and a_std_name[:-1]==deep_case:
                            output_dir[(stat_dir,deep_case,designs,a_band_size,compress_levels[compressing])]=this_nameQR
                            actions+=1
                        elif deep_case=="zz" and designs==21 and a_name=="differgall":
                            output_dir[(stat_dir,deep_case,designs,a_band_size,compress_levels[compressing])]="differgall"
                            actions+=1
                            #print stat_dir,deep_case,str(designs),str(a_band_size),compress_levels[compressing]," = differgall"
                        elif deep_case=="zz" and designs==25 and a_name=="missergall":
                            output_dir[(stat_dir,deep_case,designs,a_band_size,compress_levels[compressing])]="missergall"
                            actions+=1    
                        else:
                            pass # could put a break in the if clause just above here to speed things up?
                            #print this_nameQR," vs ",a_name,"      ",
        #print output_dir
        #print this_nameQR
        #print a_name
    print "times through to new capture =",str(actions)
    return output_dir # end of check_lower_level
        
def make_se_tables(bigD):
    #operates on datafile type 8 - pYYMMDDcBse      top_dir,inner_dir,upper_file_list
    print "\nNow at line 615 in 'make_se_tables'"
    top_dir=bigD["s_top"]
    inner_dir=bigD["s_in"]
    upper_file_list=bigD[("source","upper_files")]
    full_se_dict={}
    summary_count={}
    accumulator={}
    accume2={}
    accume3={}
    for pc_bands in range(1,4):
        for compress in (4,8,16,32,64):
            currVATdict_in={}
            currVATdict_out={}
            se_file=top_dir+inner_dir+"special\\"+upper_file_list[(8,str(pc_bands),str(compress))]
            success,currVATdict_out=VAT_reader(currVATdict_in,se_file,1,"csv",1) # only passing two parameters, using default for remainder
            this_sum=0
            entry_count=0
            if success:
                summary_count[(pc_bands,compress)]=0
                se_file_key=currVATdict_out.keys()
                se_file_key.sort()
                for a_key in se_file_key:
                    load_key=(pc_bands,compress,int(a_key)) # possible values from GRID table in range 0 to ~100
                    full_se_dict[load_key]=currVATdict_out[a_key]
                    this_sum+=currVATdict_out[a_key]
                    entry_count+=1
                    accumulator[(pc_bands,compress,entry_count)]=this_sum # sorting of se_file_key allows direct in order running partial totals
                    accume2[("se_value",pc_bands,compress,entry_count)]=int(a_key) # need to capture actual values because gaps are possible
            else:
                print "failed to read VAT file"
                haltnow=1/0
            summary_count[(pc_bands,compress)]=this_sum # accumulate totals on the fly while the full_se_dict is being loaded
            summary_count[("case_count",pc_bands,compress)]=entry_count # special case "case_count" holds number of rows in VAT
            for a_count in range(1,entry_count+1):
                accume3[(pc_bands,compress,a_count)]=float(accumulator[(pc_bands,compress,a_count)])/float(summary_count[(pc_bands,compress)])
    bigD["full_se_dict"]=full_se_dict
    bigD["summary_count"]=summary_count
    bigD["accumulator"]=accumulator
    bigD["accume2"]=accume2
    bigD["accume3"]=accume3
    #return full_se_dict,summary_count,accumulator,accume2,accume3 # end of make_se_tables            
    return bigD # end of make_se_tables

def cutoff_list(bigD):
    #def cutoff_list(accume3_dictionary,summary_count_dictionary):
    #(pc_bands,compress,sequential_count_of_rows_in_VAT) - rows are not always equal to pixel values because some do not exist near 100 SE
    #step 1 is to find value closest to 22% of data included for 16 by 16 compression - others get set relative to this
    summary_count_dictionary=bigD["summary_count"]
    accume3_dictionary=bigD["accume3"]
    print "\nthis is a partial list of statistical SE cutoff values to be used"
    output_cutoffs={}
    compress_links={4:2,8:1,16:0,32:-1,64:-2}
    ##compress=16
    for pc_bands in range(1,4):
        compress=16 # starting case for finding row nearest to while still under 22%
        cases=summary_count_dictionary[("case_count",pc_bands,compress)]
        finding_case=0 #even the first row might have more than 22% of data
        for a_case in range(1,cases+1):
            if accume3_dictionary[(pc_bands,compress,a_case)]<0.22:
                finding_case=a_case
        output_cutoffs[(pc_bands,16,0)]=finding_case #start position for each pc_band
        if (pc_bands,16,finding_case) in accume3_dictionary.keys():
            output_cutoffs[("percent_under",pc_bands,16,0)]=accume3_dictionary[(pc_bands,16,finding_case)]
        else:
            print "error at line 515 for attempted key= ",str(pc_bands),str(16),str(finding_case)
        for compress in (4,8,32,64):
            relative = compress_links[compress]
            adjust_case = finding_case+relative
            if adjust_case<0: adjust_case=0
            output_cutoffs[(pc_bands,compress,0)]=adjust_case #start position for each pc_band
            output_cutoffs[("percent_under",pc_bands,compress,0)]=accume3_dictionary[(pc_bands,compress,adjust_case+1)] #fails at key (2, 32, 0)
        for many_rounds in range(1,50): #this will generate 50 cutoff_lists (0 to 49 indexed) hopefully more than is needed
            for compress in (4,8,16):
                next_value=1+output_cutoffs[(pc_bands,compress,many_rounds-1)]
                output_cutoffs[(pc_bands,compress,many_rounds)]=next_value
                if (pc_bands,compress,next_value+1) in accume3_dictionary.keys():
                    output_cutoffs[("percent_under",pc_bands,compress,many_rounds)]=accume3_dictionary[(pc_bands,compress,next_value+1)] #next_value!
                else:
                    print "error at line 529 for attempted key = ",str(pc_bands),str(compress),str(next_value+1)
            for compress in (32,64):
                if output_cutoffs[(pc_bands,compress/2,many_rounds-1)]==0:
                    next_value=0 #leave it at 0 if the next lower compress is still at 0
                else:
                    next_value=1+output_cutoffs[(pc_bands,compress,many_rounds-1)]
                output_cutoffs[(pc_bands,compress,many_rounds)]=next_value
                if (pc_bands,compress,next_value+1) in accume3_dictionary.keys():
                    output_cutoffs[("percent_under",pc_bands,compress,many_rounds)]=accume3_dictionary[(pc_bands,compress,next_value+1)] #next_value!
                else:
                    print "error at line 536 for attempted key= ",str(pc_bands),str(compress),str(next_value+1)
    for row in range(1): #1 instead of 50 to reduce screen output
        for compress in (4,8,16,32,64):
            index1=(1,compress,row)
            index2=(2,compress,row)
            index3=(3,compress,row)
            print index1,"=",str(output_cutoffs[index1]),"   ",index2,"=",str(output_cutoffs[index2]),"   ",index3,"=",str(output_cutoffs[index3])
    bigD["output_cutoffs"]=output_cutoffs
    #return output_cutoffs # end of cutoff_list function
    return bigD # end of cutoff_list function

def show_connections(some_source):
    print "show_connections function on line 542 **** Displaying functioning links of expected versus truly present files"
    these_keys=some_source.keys()
    print str(len(some_source))
    these_keys.sort()
    for a_key in these_keys:
        pass ## turning off printing for now
        ##print a_key," => ",some_source[a_key]
    return True #end of show_connnections - a debugging routine

def use_walk(a_target,switch):
    if a_target[-1:]=="\\": a_target=a_target[:-1] # clean up any unneeded \ at end of input directory
    tree = os.walk(a_target)
    if switch: # only print out this if desired
        print "Exploring tree for: "+a_target+"\n"
        for dir in tree:
            print dir[0]+"\\"
            for inner_dir in dir[1]: print "\t\\"+inner_dir+"\\"
            for reg_files in dir[2]:
                if reg_files=="dblbnd.adf":
                    print "GRID raster"
                    break
                elif reg_files=="arc.dir":
                    print "ARC INFO"
                    break
                print "\t\t"+reg_files
        print
    #end of use_walk function

def next_stats(bigD):
##    def next_stats(stat_start,subdir_good,stat_current,what_is_next,final_dir_finished,source_top,source_in,upper_files,this_dir,\
##                   defect_top,defect_in,defect_short,defect_bands,cutoff_list):
    #ready2go=next_stats(status_start,subdir_good,status_current,where2start,final_dir_finished,source_topmost,source_inner,source_upper_files,\
#                    this_dir,defect_topmost,defect_inner,defect_short,defect_band_list,full_cutoff_list)
    stat_start=bigD["all_good"]
    subdir_good=bigD["subdir_good"]
    stat_current=bigD["status_current"]
    what_is_next=bigD["where2start"]
    final_dir_finished=bigD["final_dir_full"]
    source_top=bigD["s_top"]
    source_in=bigD["s_in"]
    upper_files=bigD[("source","upper_files")]
    this_dir=bigD["this_dir"]
    defect_top=bigD["d_top"]
    defect_in=bigD["d_in"]
    defect_short=bigD["d_short"]
    defect_bands=bigD[("defect","image_bands")]
    cutoff_list=bigD["output_cutoffs"] # this was the wrong piece of the related dictionaries, fixed now
    #fixme=bigD["fixme"]
    if not stat_start: print "need to do the original SE calculations"
    if not stat_current: print "something is wrong with previous runs or they haven't happened yet"
    #controlling for status start, status_current will need to be done in a different routine before this one is called
    if not stat_start:
        print "bailing out at line 594 in next_stats function because the upper level files are not 100% OK"
        return False
    #okay to run
    #dictionary uses index 0 for 'za', 1 for 'yb', 2 for 'wc', 3 for 'vd', 4 for 'ue', 5 for 'tf', etc. *** 1 off from index in cutoff_list
    if this_dir=="": this_dir="stat"+defect_short #might eventually not be needed
    print "\nfunction 'next_stats' running at line 599"
    print "stat_start=",stat_start
    print "subdir_good=",subdir_good
    print "stat_current=",stat_current
    print "what_is_next=",str(what_is_next)
    print "final_dir_finished=",final_dir_finished
    print "this_dir=",this_dir
    overwrite_data=False #temporary version just hard coding value inside of function
    calc_files={}
    cutoffs={}
    subdir_list=sub_dir_naming() #dictionary format is {0:"xx\\",1:"za\\",2:"yb\\",...} #wc, vd, ue etc.
    if final_dir_finished: #need to generate start of new directory
        run_num=what_is_next+1
    else:
        run_num=what_is_next
    ##print str(run_num)
    new_subdir=subdir_list[run_num-1]
    print str(run_num)
    print new_subdir
    long_new_subdir=source_top+source_in+"special\\"+this_dir+"\\"+new_subdir
    #haltnow=1/0 #debugging break
    if final_dir_finished and new_subdir!="__": #catch the case of attempting to build subdir no. 52 which would be '__'
        if not os.path.exists(long_new_subdir):
            print "\nCreating new subdirectory "+new_subdir+" which is indexed as "+str(run_num-1)
            os.mkdir(long_new_subdir)
        else: print "subdir ",new_subdir," already exists, no need to create it. Will look for unfinished summary calcs."
    else:
        print "\nUsing preexisting, partially filled subdirectory "+new_subdir+" which is indexed as "+str(run_num-1)
    if run_num<52:
        for bands in range(1,4):
            for compress in (4,8,16,32,64):
                cutoffs[(bands,compress)]=cutoff_list[(bands,compress,run_num-2)]
                #print str(bands)+" "+str(compress)+" has cutoff = "+str(cutoff_list[(bands,compress,run_num-2)]) # indices out of phase by 1
        for bands in range(1,4):        
            for compress in (4,8,16,32,64):
                index=(8,str(bands),str(compress))
                se_file_name=source_top+source_in+"special\\"+upper_files[index]
                #print this_dir
                rast_calc_name=source_top+source_in+"special\\"+this_dir+"\\"+new_subdir+"sel"+new_subdir[1:2]+"_c"+str(bands)+"out"+str(compress) #similar names present in source_lower for previous subdirs
                #print rast_calc_name
                calc_files[("sel",bands,compress)]=rast_calc_name
                if not os.path.exists(rast_calc_name) or overwrite_data:
                    print rast_calc_name
                    raster_algebra = Con(Raster(se_file_name) <= cutoff_list[(bands,compress,run_num-2)], compress, 0)
                    raster_algebra.save(rast_calc_name)
            rast_calc_name=source_top+source_in+"special\\"+this_dir+"\\"+new_subdir+"usal"+new_subdir[1:2]+source_in[2:-1]+"c"+str(bands)
            calc_files[("usal",bands)]=rast_calc_name
            if not os.path.exists(rast_calc_name) or overwrite_data:
                raster_algebra = Con(Raster(calc_files[("sel",bands,64)]) > 0, 64, Con(Raster(calc_files[("sel",bands,32)]) > 0, 32, \
                                Con(Raster(calc_files[("sel",bands,16)]) > 0, 16, Con(Raster(calc_files[("sel",bands,8)]) > 0, 8, \
                                Con(Raster(calc_files[("sel",bands,4)]) > 0, 4, 1)))))   
                raster_algebra.save(rast_calc_name)
        for compress in (64,32,16,8,4):
            #create the al* series with 64 very special and 4 slightly different
            if compress==64:
                raster_algebra = Con(Raster(calc_files[("usal",1)]) == 64,Con(Raster(calc_files[("usal",2)]) == 64,Con(Raster(calc_files[("usal",3)])\
                                == 64,64,0),0),0)
            elif compress>4:
                raster_algebra = Con(Raster(calc_files[("usal",1)]) >= compress,Con(Raster(calc_files[("usal",2)]) >= compress,\
                                Con(Raster(calc_files[("usal",3)]) >= compress,Con(Raster(calc_files[("al",2*compress)]) == 0,\
                                compress,Raster(calc_files[("al",2*compress)])),0),0),0)
            elif compress==4:
                raster_algebra = Con(Raster(calc_files[("usal",1)]) >= 4,Con(Raster(calc_files[("usal",2)]) >= 4,Con(Raster(calc_files[("usal",3)]) \
                                >= 4,Con(Raster(calc_files[("al",8)]) == 0,4,Raster(calc_files[("al",8)])),1),1),1)
            else:
                pass #no way to get here
            rast_calc_name=source_top+source_in+"special\\"+this_dir+"\\"+new_subdir+"al"+new_subdir[1:2]+source_in[2:-1]+"ok"+str(compress) 
            #print rast_calc_name
            calc_files[("al",compress)]=rast_calc_name
            if not os.path.exists(rast_calc_name) or overwrite_data:
                print rast_calc_name
                raster_algebra.save(rast_calc_name)
        #making the "edge030606alf" type file
        rast_calc_name=source_top+source_in+"special\\"+this_dir+"\\"+new_subdir+"edge"+source_in[2:-1]+"al"+new_subdir[1:2]
        #print rast_calc_name
        calc_files[("edge")]=rast_calc_name
        if not os.path.exists(rast_calc_name) or overwrite_data:
            print rast_calc_name
            raster_algebra = Con(IsNull(Raster(calc_files[("al",4)])),source_top+source_in+"special\\"+this_dir+"\\xx\\use"+source_in[:-1]+"v1")
            raster_algebra.save(rast_calc_name)  
        if new_subdir[:2]=="za": #making the "n030606t42213" type file @@@@@@@@@@@@@@@@@@@@@ Needed to be expanded to detect gaps in any of the bands, not just B1
            key_list2=bigD[("defect","image_bands")].keys()
            key_list=[]
            for each_key in key_list2: # odd MTL.TIF file present in at least one Landsat7 dataset
                if len(each_key) in [2,3]:
                    key_list.append(each_key)
                    #key_list.remove(each_key) #removes oddballs occasionally present
            key_list.sort()
            arcpy.env.extent=defect_top+defect_in+defect_bands[key_list[len(key_list)-1]] #this should force extent to band B8 with normal nomenclature
            key_num=-1
            pool1=source_top+source_in+"special\\"+this_dir+"\\"+new_subdir+"p"+source_in[2:-1]+new_subdir[:1]+defect_short[5:]+defect_short[2:4]
            pool2=source_top+source_in+"special\\"+this_dir+"\\"+new_subdir+"q"+source_in[2:-1]+new_subdir[:1]+defect_short[5:]+defect_short[2:4]
            for image_key in key_list:
                key_num+=1
                print pool1
                if key_num==0:
                    raster_algebra = Con(IsNull(Raster(defect_top+defect_in+defect_bands[image_key])),key_num,\
                                         Con(Raster(defect_top+defect_in+defect_bands[image_key])==0,key_num)) #likely to be B1
                    raster_algebra.save(pool1) #version handles null or 0
                else:
                    raster_algebra = Con(IsNull(Raster(pool1)),Con(IsNull(Raster(defect_top+defect_in+defect_bands[image_key])),key_num,\
                                         Con(Raster(defect_top+defect_in+defect_bands[image_key])==0,key_num)),Raster(pool1)) #likely to be higher names
                    raster_algebra.save(pool2) #version handles null or 0
                    raster_algebra = Raster(pool2)
                    raster_algebra.save(pool1) #keep rewriting pool
            arcpy.env.extent="DEFAULT" # reset back to normal default
        else:
            pool1=source_top+source_in+"special\\"+this_dir+"\\za\\p"+source_in[2:-1]+"z"+defect_short[5:]+defect_short[2:4]
            #only needed to make the fancy band definition one time, saving 5 minutes each subsequent cycle by reusing first one
            
        rast_calc_name=source_top+source_in+"special\\"+this_dir+"\\"+new_subdir+"n"+source_in[2:-1]+new_subdir[:1]+defect_short[5:]+defect_short[2:4]
        print rast_calc_name # need to improve formula to restrict results to just the good areas of the source raster
        calc_files[("n")]=rast_calc_name
        test=source_top+source_in+"special\\"+bigD[("source","better_upper")][(0,1,"4")] #this is mYYMMDDtest with values 0 to 7 for good bands
        if not os.path.exists(rast_calc_name) or overwrite_data: #this version is wrong because it only works for B1 and not all others
            raster_algebra = Con(Raster(test)>0,Con(Raster(pool1)>=0,Raster(calc_files[("al",4)]))) #version handles null or 0
            raster_algebra.save(rast_calc_name)
        #current version only builds the pc1 fixes, not the full set of all raw bands
        for compress in (64,32,16,8,4,1):
            #for the "pc1cutmi64" to 1 case to create the "fix"
            rast_calc_name=source_top+source_in+"special\\"+this_dir+"\\"+new_subdir+new_subdir[:1]+"fixpc1x"+str(compress)
            #print rast_calc_name
            calc_files[("fix",compress)]=rast_calc_name
            if not os.path.exists(rast_calc_name) or overwrite_data: # or fixme:
                print rast_calc_name
                if compress > 1:
                    raster_algebra = Con(Raster(calc_files[("n")]) == compress,Raster(source_top+source_in+"special\\"+this_dir+"\\xx\\pc1cutmi"+str(compress)))
                else: #compress ==1
                    raster_algebra = Con(Raster(calc_files[("n")]) == compress,Raster(source_top+source_in+"special\\pc"+source_in[2:-1]+"c1"))
                raster_algebra.save(rast_calc_name)
            #next the "pc030660c1" case to create the "orig"
            rast_calc_name=source_top+source_in+"special\\"+this_dir+"\\"+new_subdir+"origpc1"+new_subdir[:1]+str(compress)
            #print rast_calc_name
            calc_files[("orig",compress)]=rast_calc_name
            if not os.path.exists(rast_calc_name) or overwrite_data: # or fixme:
                print rast_calc_name
                raster_algebra = Con(Raster(calc_files[("n")]) == compress,Raster(source_top+source_in+"special\\pc"+source_in[2:-1]+"c1"))
                raster_algebra.save(rast_calc_name)
            #then the "tdifferx64" case to create the "differ"
            rast_calc_name=source_top+source_in+"special\\"+this_dir+"\\"+new_subdir+new_subdir[:1]+"differx"+str(compress)
            #print rast_calc_name
            calc_files[("differ",compress)]=rast_calc_name
            if not os.path.exists(rast_calc_name) or overwrite_data: # or fixme:
                print rast_calc_name
                raster_algebra = Raster(calc_files[("orig",compress)]) - Raster(calc_files[("fix",compress)])
                raster_algebra.save(rast_calc_name)
        #now the last few rasters ********************** THIS IS WHERE THE PROCESS IS NOT RIGHT!!!*************
        rast_calc_name5=source_top+source_in+"special\\"+this_dir+"\\"+new_subdir+new_subdir[:1]+"differall"
        #print rast_calc_name5
        calc_files[("differ","all")]=rast_calc_name5
        if not os.path.exists(rast_calc_name5) or overwrite_data: # or fixme:
            print rast_calc_name5
            raster_algebra5 = Con(IsNull(Raster(calc_files[("differ",64)])),Con(IsNull(Raster(calc_files[("differ",32)])),\
                    Con(IsNull(Raster(calc_files[("differ",16)])),Con(IsNull(Raster(calc_files[("differ",8)])),Raster(calc_files[("differ",4)]),\
                    Raster(calc_files[("differ",8)])),Raster(calc_files[("differ",16)])),Raster(calc_files[("differ",32)])),\
                    Raster(calc_files[("differ",64)]))
            raster_algebra5.save(rast_calc_name5)
        rast_calc_name6=source_top+source_in+"special\\"+this_dir+"\\"+new_subdir+new_subdir[:1]+"fixpc1ge4all"
        #print rast_calc_name6
        calc_files[("fix","all")]=rast_calc_name6
        if not os.path.exists(rast_calc_name6) or overwrite_data: # or fixme:
            print rast_calc_name6
            raster_algebra6 = Con(IsNull(Raster(calc_files[("fix",64)])),Con(IsNull(Raster(calc_files[("fix",32)])),\
                    Con(IsNull(Raster(calc_files[("fix",16)])),Con(IsNull(Raster(calc_files[("fix",8)])),Raster(calc_files[("fix",4)]),\
                    Raster(calc_files[("fix",8)])),Raster(calc_files[("fix",16)])),Raster(calc_files[("fix",32)])),\
                    Raster(calc_files[("fix",64)]))
            raster_algebra6.save(rast_calc_name6)
        rast_calc_name=source_top+source_in+"special\\"+this_dir+"\\"+new_subdir+new_subdir[:1]+"methodnum"
        #print rast_calc_name
        calc_files[("methodnum")]=rast_calc_name
        if not os.path.exists(rast_calc_name) or overwrite_data: # or fixme:
            print rast_calc_name
            if run_num>2:
                previous_run=source_top+source_in+"special\\"+this_dir+"\\"+subdir_list[run_num-2]+subdir_list[run_num-2][:1]+"fixpc1ge4all"
                previous_method=source_top+source_in+"special\\"+this_dir+"\\"+subdir_list[run_num-2]+subdir_list[run_num-2][:1]+"methodnum"
                calc_files["previous_run"]=previous_run
                calc_files["previous_method"]=previous_method
                print previous_run,str(run_num-1)
                ###haltnow=1/0
                raster_algebra = Con(IsNull(Raster(previous_run)),Con(Raster(calc_files[("fix","all")]) >=0,run_num-1),Raster(previous_method))
            elif run_num==2:
                raster_algebra = Con(Raster(calc_files[("fix","all")]) >=0,run_num-1)
            else:
                print "it was an error to get to line 754"
            raster_algebra.save(rast_calc_name)    
        #now need to add the growing raster of which compression level to use
        rast_calc_name=source_top+source_in+"special\\"+this_dir+"\\"+new_subdir+new_subdir[:1]+"methodcomp"
        calc_files["methodcomp"]=rast_calc_name
        if new_subdir[:2]=="xx":
            pass # may need to avoid doing anything in subdir 'xx'
        elif new_subdir[:2]=="za":
            raster_algebra = Con(Raster(calc_files["methodnum"]) == 1,Con(Raster(calc_files["n"]) > 1,Raster(calc_files["n"])))
        else:
            prev_subdir=subdir_list[run_num-2]
            print source_top+source_in+"special\\"+this_dir+"\\"+prev_subdir+prev_subdir[:1]+"methodcomp"
            raster_algebra = Con(Raster(calc_files["methodnum"]) == run_num-1,Con(Raster(calc_files["n"]) > 1,Raster(calc_files["n"])),\
                                 source_top+source_in+"special\\"+this_dir+"\\"+prev_subdir+prev_subdir[:1]+"methodcomp")
        if not os.path.exists(rast_calc_name) or overwrite_data: # or fixme:
            raster_algebra.save(rast_calc_name)
        #NEWLY added to retain highest reliability versions as each less conservative condition is incorporated
        rast_calc_name7=source_top+source_in+"special\\"+this_dir+"\\"+new_subdir+new_subdir[:1]+"pixpc1ge4all"
        rast_calc_name8=source_top+source_in+"special\\"+this_dir+"\\"+new_subdir+new_subdir[:1]+"misserall1" #better than differall
        calc_files[("pixpc1ge4all")]=rast_calc_name7
        calc_files[("misserall1")]=rast_calc_name8
        calc_files[("misser","all","1")]=rast_calc_name8
        if not os.path.exists(rast_calc_name7) or not os.path.exists(rast_calc_name8) or overwrite_data: # or fixme:
            print rast_calc_name7," and ",rast_calc_name8
            if run_num>2:
                previous_pix=source_top+source_in+"special\\"+this_dir+"\\"+subdir_list[run_num-2]+subdir_list[run_num-2][:1]+"pixpc1ge4all"
                previous_miss=source_top+source_in+"special\\"+this_dir+"\\"+subdir_list[run_num-2]+subdir_list[run_num-2][:1]+"misserall1"
                calc_files["previous_pix"]=previous_pix
                calc_files["previous_miss"]=previous_miss
                print previous_pix,str(run_num-1)," and ",previous_miss
                ###haltnow=1/0
                raster_algebra7 = Con(IsNull(Raster(previous_pix)),Raster(rast_calc_name6),Raster(previous_pix))
                raster_algebra8 = Con(IsNull(Raster(previous_miss)),Raster(rast_calc_name5),Raster(previous_miss))
            elif run_num==2: # just do a simple copy for subdir 'za' case to start the process
                raster_algebra7 = Raster(rast_calc_name6)
                raster_algebra8 = Raster(rast_calc_name5)
            else:
                print "it was an error to get to line 791"
            raster_algebra7.save(rast_calc_name7)
            raster_algebra8.save(rast_calc_name8)
        #STUFF ABOVE WAS WHERE THE MIX OF BEST PREVIOUS PLUS NEW CASE PIXELS MUST BE MERGED FOR BOTH VALUES AND DIFFERENCES WITH RAW DATA
        bigD=std_stats(calc_files,run_num-1,bigD) #'Qdifferpc1ge44' has the counts and values needed to determine standard deviations for this round of fixing SLC gaps
    elif run_num==52:
        rast_calc_name=source_top+source_in+"special\\"+this_dir+"\\"+new_subdir+"missergall"
        print rast_calc_name
        calc_files[("misser","all")]=rast_calc_name
        print "got to line 800"
        print bigD["where2start"]
        bigD["where2start"]=51
    else: # no longer needed to handle the final case closing out the process
##        print "got to line 804"
##        print bigD["where2start"]
        pass
##        bigD["where2start"]=51 #????
    #and still need to deal with the full set of all raster bands (and maybe also the next couple of principal components
    #the various extra items will force some added calcs even in the already done subdirectories
    #haltnow=1/0
    ##bigD=std_stats(calc_files,run_num,bigD) #'Qdifferpc1ge44' has the counts and values needed to determine standard deviations for this round of fixing SLC gaps    
    bigD["calc_files"]=calc_files
    return bigD #end of next_stats function - what will need to be returned as values?

def std_stats(calc_files,run_num_includesXX0,bigD): # still being built
    currVATdict_in={}
    currVATdict_out={}
    accumulator={}
    se_file=calc_files[("misser","all","1")]
    bigD["calc_files"]=calc_files
    success,currVATdict_out=VAT_reader(currVATdict_in,se_file,1,"csv",1) # only passing two parameters, using default for remainder
    entry_count=0 #compressed index to simplify later analysis
    value_sum=0 #a_key
    count_sum=0 #currVATdict_out
    prod_sum=0 #their product
    SS=0
    print "\nat line 827 in 'std_stats' function"
    if success:
        se_file_key=currVATdict_out.keys()
        se_file_key.sort() #possibly in order even before the sort()
        for a_key in se_file_key:
            entry_count+=1
            this_value=a_key
            if a_key>2**31: this_value=a_key-(2**32) # convert 2s complement back to standard negative number
            accumulator[("value",entry_count)]=this_value
            ##if entry_count<10 or entry_count>450: print str(a_key)
            accumulator[("count",entry_count)]=currVATdict_out[a_key]
            accumulator[("product",entry_count)]=this_value*currVATdict_out[a_key]
            value_sum+=this_value
            count_sum+=currVATdict_out[a_key]
            prod_sum+=this_value*currVATdict_out[a_key]
        print "subdirectory number ",str(run_num_includesXX0),"stored in: ",se_file[-14:-12] #this is wrong for cleanup #51
        print "length of accumulator dictionary is ",str(len(accumulator))    
        print "entry_count =",str(entry_count) #OK
        print "value_sum =",str(value_sum) #problem probably was format for negative values
        print "count_sum =",str(count_sum) #OK
        print "prod_sum =",str(prod_sum)
        raw_mean=(1.0*prod_sum)/(1.0*count_sum)
        print "weight mean value =",str(raw_mean),"for file:",se_file
        for sequential in range(1,1+entry_count):
            SS+=accumulator[("count",sequential)]*(float(accumulator[("value",sequential)])-raw_mean)**2
            #possibly also later on do some version of distribution tail clipping to produce 'closer to truly normal' data
        variance_val=1.0*SS/(1.0*count_sum)
        std_dev=variance_val**0.5
        print "SS =",str(SS)
        print "variance =",str(variance_val)
        print "standard deviation =",str(std_dev)
        summary_filename=se_file+"_summary.csv"
        fileout=open(summary_filename,"w")
        fileout.write("source,value\n")
        fileout.write("input_filename = ,"+se_file+"\n")
        fileout.write("number of rows = ,"+str(entry_count)+"\n")
        fileout.write("pixel count sum = ,"+str(count_sum)+"\n")
        fileout.write("weighted valueXcount sum = ,"+str(prod_sum)+"\n")
        fileout.write("raw_mean = ,"+str(raw_mean)+"\n")
        fileout.write("sums of squares = ,"+str(SS)+"\n")
        fileout.write("variance = ,"+str(variance_val)+"\n")
        fileout.write("standard deviation = ,"+str(std_dev)+"\n")
        fileout.close()
    else:
        print "failed to read VAT file"
        haltnow=1/0
    pass
    return bigD #end of 'std_stats'

def validate_current(bigD):
    #def validate_current(source_top,source_in,lower_files,preexisting_subdirs,defect_short):    #status_current,where2start,final_finished,this_dir=validate_current(source_top,source_in,lower_files,existing_subdirs,defect_shortname)
    #first must find the most recently created directory in special/statYYYYMMDD/ series (xx,za,yb,wc,vd,ue...)
    #lower_files[(stat_dir,deep_case,designs,a_band_size,compress_levels[compressing])]=this_nameQR
    source_top=bigD["s_top"]
    source_in=bigD["s_in"]
    lower_files=bigD[("source","better_lower")] #should be better_lower rather than lower_files
    preexisting_subdirs=bigD["good_subdirs"]
    defect_short=bigD["d_short"]
    ##bigD["expect_num"]=46 #a finished case will have 46 GRID files but only 44 in better_list so far (47 subdirectories including the INFO before more workfiles were added
    expect_num=bigD["expect_num"] #now setting in the original global definition of preexisting bigD items
    bigD["incomplete"]=-1 # to avoid having it reset earlier and then carry over
    print "\nnow running 'validate_current' function at line 888"
    final_dir_full=True #False #default start value
    this_dir=""
    this_case=""
    GRID_count=0
    stat_dir=""
    subdir_list=sub_dir_naming() #dictionary format is {0:"xx\\",1:"za\\",2:"yb\\",...} #wc, vd, ue etc.
    #print preexisting_subdirs
    invert_directory={}
    for counting in range(52):
        this_sub_key=subdir_list[counting][:-1]
        invert_directory[this_sub_key]=counting
    print invert_directory
    bigD["invert"]=invert_directory
    lower_keys=lower_files.keys()
    check_keys=preexisting_subdirs.keys()
    check_keys.sort()
    highest_possibly_good_case=0
    for verify in check_keys:
        if verify!=highest_possibly_good_case:
            #highest_possibly_good_case+=-1
            break # the subdir indexed as verify occurs after some missing subdir
        else: #verify was equal to test values
            highest_possibly_good_case+=1
    print "the highest contiguous subdir that already exists was ",str(highest_possibly_good_case)     
    #highest_number=len(preexisting_subdirs)#point to adjust in order to force finishing up of older directories
    file_count=0
    full_counter={}
    print "number of entries in dictionary = ",str(len(lower_keys))
    for subdir_count in range(len(subdir_list)): #making file counter for sub directory contents
        full_counter[("subdir",subdir_count)]=0 #set all possible entries to 0
        #print "subdir",str(subdir_count),subdir_list[subdir_count]
    lower_keys.sort()
    for a_key in lower_keys:
        #print a_key
        if len(a_key)==5:
            (stat_dir,deep_case_dir,designs,band_size,compress)=a_key
        else:
            print str(len(a_key))
            pass # no other options to make
        #print deep_case_dir #,preexisting_subdirs[highest_number]
        if len(deep_case_dir)==2:
            deep_case_number=invert_directory[deep_case_dir]
        elif len(deep_case_dir)==3:
            shorter=deep_case_dir[:-1]
            deep_case_number=invert_directory[shorter]
        else:
            print deep_case_dir
            print str(len(deep_case_dir))
            print "wrong directory name size"
        full_counter["subdir",deep_case_number]+=1 # even do the highest number case checked separately in the next line
        if deep_case_dir==preexisting_subdirs[highest_possibly_good_case-1]:
            file_count+=1
            this_dir=stat_dir
            this_case=deep_case_dir
            print a_key,lower_files[a_key] #this is going correctly
    print str(len(full_counter))
    bigD["stat_dir"]=stat_dir # added 12/19/17
    print "number of preexisting subdirs at line 936 is",str(len(preexisting_subdirs))
    print preexisting_subdirs
    for subdir_count in range(highest_possibly_good_case): #len(preexisting_subdirs)): #making file counter for sub directory contents
        ##output_dir[(stat_dir,deep_case,designs,a_band_size,compress_levels[compressing])]
        if subdir_list[subdir_count][:-1]!="xx": # skip that case of the first directory to load with various compressions
            #print "for full_counter =",str(full_counter[("subdir",subdir_count)]),"in subdir_count =",str(subdir_count)
            index=(stat_dir,preexisting_subdirs[subdir_count],21,1,"4")
            if index in bigD[("source","better_lower")].keys():
                diff_filename=bigD[("source","better_lower")][index]
                #print diff_filename
                summary_filename=source_top+source_in+"special\\"+index[0]+"\\"+index[1]+"\\"+diff_filename[:1]+"miss"+\
                                  diff_filename[5:]+"1_normalize.csv" #needed to check for its existence                
                #summary_filename=source_top+source_in+"special\\"+index[0]+"\\"+index[1]+"\\"+diff_filename+"_normalize.csv" #needed to check for its existence
                #print summary_filename," was the missing file in validate search on line 949"
            else: #file with name like 'differgall' does not current exist
                print "index fails for ",stat_dir,preexisting_subdirs[subdir_count],str(21),str(1),"4"
                bigD["incomplete"]=subdir_count+1 # offset needed to trigger stats properly
                break
            if not os.path.exists(summary_filename) or (bigD["overwrite_summaries"] and bigD["current_fix"]<=subdir_count) : #new code to check for presence of normalize.csv summary file
                print "missing the final summary file",summary_filename
                print os.path.exists(summary_filename)
                bigD["incomplete"]=subdir_count+1 #lost track of when this +1 adjustment is needed
                break 
            if full_counter[("subdir",subdir_count)]<expect_num:
                if subdir_count<(len(preexisting_subdirs)-1):
                    print "unfinished calculations in subdir number",str(subdir_count),subdir_list[subdir_count],\
                          str(full_counter[("subdir",subdir_count)])
                    bigD["incomplete"]=subdir_count+1 # offset needed to trigger stats properly
                    break
                else:
                    print "okay at line 966 with subdir_count =",str(subdir_count)
                    print str(full_counter[("subdir",subdir_count)])
                    print "length of full_counter =",str(len(full_counter))
        else: #the 'xx' subdir case
            print "subdir_count",str(subdir_count) #subdir_count is 0 when it should be more early on in cycle
            print "subdir in question at line 971",subdir_list[subdir_count]        
    ##haltnow=1/0
    if bigD["incomplete"]>0:
        bigD["status_current"]=True #manual version needs to be replaced
        bigD["where2start"]=bigD["incomplete"]
        bigD["final_dir_full"]=False
        print "at line 977, 'incomplete' = ",str(bigD["incomplete"])
        pass
    elif bigD["incomplete"]==0:
        print "at line 980, 'incomplete' = ",str(bigD["incomplete"])
        pass
    else: #regular case of everything finished up going into the final directory
        try:
            print preexisting_subdirs[highest_possibly_good_case]
        except:
            print preexisting_subdirs[highest_possibly_good_case-1]
        print "file count at line 987 = ",str(file_count)
        print "at line 988, 'incomplete' = ",str(bigD["incomplete"])
        if file_count>=expect_num or ((file_count>=18) and (highest_possibly_good_case==1)): #could also a test for use030606c1 presence
            final_dir_full=True
            print str(file_count)," for 'file_count' is reason for setting 'final_dir_full' = True at line 991"
        print "file_count =",str(file_count)
        if this_dir=="": this_dir="stat"+defect_short
        print "might be already done with subdir number: "+str(highest_possibly_good_case-1)+" which is "+preexisting_subdirs[highest_possibly_good_case-1]
        print source_top+source_in+"special\\"+this_dir+"\\"+this_case
        if file_count==0:
            print "starting up of new subdir with nothing in it yet"
            final_dir_full=False
        else:
            this_list=os.listdir(source_top+source_in+"special\\"+this_dir+"\\"+this_case)
            this_list.sort()
            this_length=len(this_list) #need to limit to true subdirectories leaving out the .xml files
            print this_length
            if (this_length<2*expect_num+1) and not ((file_count>=18) and (highest_possibly_good_case==1)): #last test is for starting new case at 'za'
                print "not enuf files of GRID type, only "+str((this_length-1.0)/2.0)
                for entry in this_list:
                    if os.path.isdir(source_top+source_in+"special\\"+this_dir+"\\"+this_case+"\\"+entry):
                        GRID_count+=1
                        print entry
                print str(GRID_count)
        print "at end of 'validate_current' this_length = ",str(this_length)," with subdir highest_number = ",str(highest_possibly_good_case)
        if bigD["repair"][0]:
            highest_possibly_good_case=bigD["repair"][1]
        bigD["status_current"]=True #manual version needs to be replaced
        bigD["where2start"]=highest_possibly_good_case #changing name from highest_number to where2start
        bigD["final_dir_full"]=final_dir_full
    bigD["this_dir"]=this_dir
    print len(full_counter)
    print full_counter
    #haltnow=1/0
    #status_current,where2start,final_finished,this_dir=validate_current
    #return True,highest_number,final_dir_full,this_dir #end of validate_current function
    return bigD #end of 'validate_current' function

def validate_start(bigD):
    #def validate_start(source_top_dir,source_inner_dir,upper_file_list,defect8charname):
    #status_start,subdir_good,existing_subdirs=validate_start(source_top,source_in,upper_files,defect_shortname)
    #this just verifies that files exist, not that their contents are good!
    #need to add way to accept subdir 'xx' without insisting on full sets of files
    source_top_dir=bigD["s_top"]
    source_inner_dir=bigD["s_in"]
    upper_file_list=bigD[("source","upper_files")]
    defect8charname=bigD["d_short"]
    print "\n'validate_start' function running at line 1034"
    requirements={0:(1,1),1:(7,1),2:(1,1),3:(7,1),4:(3,5),5:(3,5),6:(3,5),7:(3,5),8:(3,5)} #designs:(bands,compressions)
    compressions={1:"4",2:"8",3:"16",4:"32",5:"64"}
    subdir_list=sub_dir_naming() #dictionary format is {0:"xx\\",1:"za\\",2:"yb\\",...} #wc, vd, ue etc.
    good_subdirs={}
    these_subdirs=[]
    all_good=True
    subdir_good=True
    file_keys=upper_file_list.keys()
    designs=requirements.keys()
    designs.sort()
    for a_design in designs:
        band_num,compress_num=requirements[a_design]
        for a_band in range(1,band_num+1):
            for a_compress_num in range(1,compress_num+1):
                if compress_num>1:
                    compress = compressions[a_compress_num]
                else:
                    compress = "no_more"
                if band_num>1:
                    index=(a_design,str(a_band),compress)
                else:
                    index=(a_design,"nothing_else",compress)
                if index not in file_keys:
                    all_good=False
                    print "key entry is missing for this index:"
                    print index
                else:
                    pass #could try a file access function of some sort to verify things are OK
    if not all_good: print upper_file_list
    for sub_dir_counts in range(52):
        test_index=("statistics","stat"+defect8charname,sub_dir_counts)
        #print test_index
        if test_index in file_keys: these_subdirs.append(upper_file_list[test_index]) # create list of all subdirs currently present
    max_good=len(these_subdirs)-1
    for recheck in range(max_good+1): #this just checks for the existence of the subdirectory, not its contents
        if subdir_list[recheck][:-1] in these_subdirs:
            good_subdirs[recheck]=subdir_list[recheck][:-1]
        else:
            print "subdir list error for case "+str(recheck)+" which was "+subdir_list[recheck]
            subdir_good=False #********* this is a pretty useless reporting variable
    if not subdir_good: print these_subdirs
    print str(len(good_subdirs))
    print all_good
    print subdir_good
    bigD["all_good"]=all_good
    bigD["subdir_good"]=subdir_good
    bigD["good_subdirs"]=good_subdirs
    ###if len(good_subdirs)<=1: bigD=builder(bigD) #new function that creates various items needed from new mask
    #return all_good,subdir_good,good_subdirs #end of validate_start function
    return bigD #end of 'validate_start' function

def validate(bigD):
    #def validate(source_top,source_in,lower_files,upper_files,defect_shortname):
    #status_start,subdir_good,existing_subdirs=validate_start(source_top,source_in,upper_files,defect_shortname)
    #status_current,where2start,final_finished,this_dir=validate_current(source_top,source_in,lower_files,existing_subdirs,defect_shortname)
    #return status_start,subdir_good,status_current,where2start,final_finished,this_dir # end of validate function
    bigD=validate_start(bigD)
    if len(bigD["good_subdirs"])<=1 or not bigD["builder_has_run"]: # or bigD["fixme"]:
        bigD=builder(bigD) #new function that creates various items needed from new mask
        bigD["builder_has_run"]=True
        bigD=validate_start(bigD) #run it again if really just at the start
        bigD=get_targets(bigD,"source")
    bigD=validate_current(bigD)
    return bigD # end of 'validate' function

def builder(bigD):
    #only called on at start of a new defect mask case
    #fixme2=False #True handled a very local trigger during repairs
    print "\nnow running 'builder' at line 1103"
    cutoff_list=bigD["output_cutoffs"]
    build_me1=bigD["s_top"]+bigD["s_in"]+"special\\stat"+bigD["d_short"]
    if not os.path.exists(build_me1): os.mkdir(build_me1)
    build_me2=build_me1+"\\xx"
    if not os.path.exists(build_me2): os.mkdir(build_me2)
    #####extra=bigD["extras"] # null "" if not needed, otherwise "extras\\"
    #start with cutting the good data with mask from defect file
    #making the "n030606t42213" type file
    #pc_list={}
    for bands in range(1,4):
        #rast_calc_name=build_me2+"\\n"+str(bands)+bigD["s_in"][2:-1]+"x"+bigD["d_short"][5:]+bigD["d_short"][2:4]
        rast_calc_name=build_me2+"\\x"+str(bands)+bigD["s_in"][2:-1]+bigD["d_short"][5:]+bigD["d_short"][2:4]
        print rast_calc_name # need to improve formula to restrict results to just the good areas of the source raster
        #calc_files[("n")]=rast_calc_name
        test=bigD["s_top"]+bigD["s_in"]+"special\\"+bigD[("source","better_upper")][(0,1,"4")] #this is mYYMMDDtest with values 0 to 7 for good bands
        #test=bigD["s_top"]+bigD["s_in"]+"special\\"+bigD[("source","better_upper")][(0,1,"4")] #this is mYYMMDDtest with values 0 to 7 for good bands
        pc=bigD["s_top"]+bigD["s_in"]+"special\\pc"+bigD["s_in"][2:8]+"c"+str(bands)
        print pc
        if not os.path.exists(rast_calc_name) or bigD["overwrite_data"]: # or fixme2:
            #print test ############  MIGHT ALSO NEED TO BE MODIFIED TO WORK WITH FULL SIZE GAPS !!!!!!!!!!!!!!!!!!!!!!!!!!
            raster_algebra = Con(Raster(bigD["d_top"]+bigD["d_in"]+bigD[("defect","image_bands")]["B1"]) > 0,pc) 
            raster_algebra.save(rast_calc_name)
    for band in range(1,4): # to also run pc2 and pc3 instead of just pc1
        bnd = str(band)
        inRaster=build_me2+"\\x"+bnd+bigD["s_in"][2:-1]+bigD["d_short"][5:]+bigD["d_short"][2:4]
        ##inRaster=build_me2+"\\x1"+bigD["s_in"][2:-1]+bigD["d_short"][5:]+bigD["d_short"][2:4]
        for compress in (4,8,16,32,64):
            rast_calc_name=build_me2+"\\pc"+bnd+"cutmn"+str(compress)
            print rast_calc_name
            nbr = NbrRectangle(compress,compress,"CELL") #CELL versus MAP units
            if not os.path.exists(rast_calc_name) or bigD["overwrite_data"]: # or fixme2:
                raster_algebra=BlockStatistics(inRaster, nbr, "MEAN", "DATA")
                raster_algebra.save(rast_calc_name)
            rast_calc_name2=build_me2+"\\pc"+bnd+"cutmi"+str(compress)
            print rast_calc_name2
            if not os.path.exists(rast_calc_name2) or bigD["overwrite_data"]: # or fixme2:
                raster_algebra=Int(rast_calc_name)
                raster_algebra.save(rast_calc_name2)
        #probably only need to run for band1 - now recognizing need for pc2 and pc3
        calc_files={}
        for compress in (4,8,16,32,64): #need to make "sel_c1out4,8,16,32,64" in order to make use030606c1 in order to make 'edge' raster
            index=(8,str(1),str(compress))
            se_file_name=bigD["s_top"]+bigD["s_in"]+"special\\p"+bigD["s_in"][2:8]+"c"+bnd+"se"+str(compress)
            #print this_dir
            rast_calc_name=build_me2+"\\selx_c"+bnd+"out"+str(compress) #similar names present in source_lower for previous subdirs
            #print rast_calc_name
            calc_files[("sel",band,compress)]=rast_calc_name
            if not os.path.exists(rast_calc_name) or bigD["overwrite_data"]: # or fixme2:
                print rast_calc_name
                raster_algebra = Con(Raster(se_file_name) <= cutoff_list[(band,compress,0)], compress, 0)
                raster_algebra.save(rast_calc_name)
        rast_calc_name=build_me2+"\\use"+bigD["s_in"][:8]+"v"+bnd
        print rast_calc_name
        #calc_files[("usal",bands)]=rast_calc_name
        if not os.path.exists(rast_calc_name) or bigD["overwrite_data"]: # or fixme2:
            raster_algebra = Con(Raster(calc_files[("sel",band,64)]) > 0, 64, Con(Raster(calc_files[("sel",band,32)]) > 0, 32, \
                            Con(Raster(calc_files[("sel",band,16)]) > 0, 16, Con(Raster(calc_files[("sel",band,8)]) > 0, 8, \
                            Con(Raster(calc_files[("sel",band,4)]) > 0, 4, 1)))))   
            raster_algebra.save(rast_calc_name)
        # not sure if more files need to be built in subdir 'xx'    
    #haltnow=1/0
    build_me3=build_me1+"\\za"
    if not os.path.exists(build_me3): os.mkdir(build_me3)
    print "now running 'get_targets' one extra time"
    bigD=get_targets(bigD,"source") #trying to capture missing newly built rasters
    #bigD=validate_start(bigD) #bad idea to call this
    #bigD["final_dir_full"]=True #not sure if it really is True, but trying that status after the Block Stats were run
    return bigD #end of 'builder' function

def first_things_first(sys_arguments):
    if len(sys_arguments)==1:
        print "running without any commands passed in from os while starting\n"
    else:
        print "the following sys.argv values were passed in:"
        for things in range(1,len(sys_arguments)): print things
        print
    #end of 'first_things_first' function

def wrap_up(bigD):
    preexisting_subdirs=bigD["good_subdirs"]
    run_num=bigD["where2start"] #what_is_next in originating code
    print "\nin 'wrap_up' at line 1184 with run_num = ",str(run_num)
##    final_dir_finished=bigD["final_dir_full"]
    source_top=bigD["s_top"]
    source_in=bigD["s_in"]
    upper_files=bigD[("source","upper_files")]
    lower_files=bigD[("source","lower_files")]
    #this_dir=bigD["this_dir"]#plan on removing and replacing with 'stat_dir'
    stat_dir=bigD["stat_dir"]
    images=bigD[("defect","image_bands")]
    #fixme=bigD["fixme"]
    calc_files={}
    ##better_upper=bigD[("source","better_upper")] #need to index to pc030606c1
    defect_top=bigD["d_top"]
    defect_in=bigD["d_in"]
##    defect_short=bigD["d_short"]
##    defect_bands=bigD[("defect","image_bands")]
##    cutoff_list=bigD["output_cutoffs"] # this was the wrong piece of the related dictionaries, fixed now
    test=source_top+source_in+"special\\"+bigD[("source","better_upper")][(0,1,"4")] #this is mYYMMDDtest with values 0 to 7 for good bands
    overwrite_data=bigD["overwrite_data"] #temporary version just hard coding value inside of function
    if not os.path.exists(source_top+source_in+"special\\"+stat_dir+"\\zz\\"):
        os.mkdir(source_top+source_in+"special\\"+stat_dir+"\\zz\\")
        print "JUST NOW CREATING THE 'zz' SUBDIRECTORY AT LINE 1205"
    preexisting_subdirs[51]="zz" #force this in case it hadn't been made
    rast_calc_name=source_top+source_in+"special\\"+stat_dir+"\\zz\\"+"methodcomp99"
    print rast_calc_name
    calc_files[("methodcomp99")]=rast_calc_name
    #print rast_calc_name
    f1=source_top+source_in+"special\\"+upper_files[(3,"1","no_more")] #pc030606c1
    print f1
    print os.path.exists(f1)
    #print images
    f2=defect_top+defect_in+images["B1"] # was wrong image from wrong date!
    print f2
    print os.path.exists(f2)
    f3=source_top+source_in+"special\\"+stat_dir+"\\bz\\"+"bmethodnum"
    print f3
    print os.path.exists(f3)
    f35=source_top+source_in+"special\\"+stat_dir+"\\bz\\"+"bmethodcomp"
    print f35
    print os.path.exists(f35)
    print source_top+source_in+"special\\"+stat_dir+"\\xx\\"
    #print lower_files[0,74] #this is Jan. 3, 2018 crash site - correct indexes [0,2to6] but not needed
    f4=source_top+source_in+"special\\"+stat_dir+"\\xx\\"+"pc1cutmi4" #lower_files[0,75][2] #32 from 74,75
    print f4
    print os.path.exists(f4)
    f5=source_top+source_in+"special\\"+stat_dir+"\\xx\\"+"pc1cutmi8" #lower_files[0,72][2] #64 from 76,72
    print f5
    print os.path.exists(f5)
    f6=source_top+source_in+"special\\"+stat_dir+"\\xx\\"+"pc1cutmi16" #lower_files[0,73][2] #8 from 72,73
    print f6
    print os.path.exists(f6)
    f7=source_top+source_in+"special\\"+stat_dir+"\\xx\\"+"pc1cutmi32" #lower_files[0,74][2] #16 from 73,74
    print f7
    print os.path.exists(f7)
    f8=source_top+source_in+"special\\"+stat_dir+"\\xx\\"+"pc1cutmi64" #lower_files[0,76][2] #4 from 75,76
    print f8
    print os.path.exists(f8)
    if run_num>=51:
        print "now creating ",rast_calc_name," in nominal run_num = ",str(run_num) # Con(Raster(f2)==0,
        if not os.path.exists(rast_calc_name) or overwrite_data:
            raster_algebra = Con(Raster(test)>0,Con(Raster(f1)>=0,Con(IsNull(Raster(f2)),Con(IsNull(Raster(f3)),Con(IsNull(Raster(f4)),\
                        Con(IsNull(Raster(f5)),Con(IsNull(Raster(f6)),Con(IsNull(Raster(f7)),64,32),16),8),4))),Con(Raster(f2)==0,\
                        Con(IsNull(Raster(f3)),Con(IsNull(Raster(f4)),Con(IsNull(Raster(f5)),\
                        Con(IsNull(Raster(f6)),Con(IsNull(Raster(f7)),64,32),16),8),4)))))
            raster_algebra.save(rast_calc_name)
        rast_calc_name2=rast_calc_name[:-6]+"num99" #this file was OK, no need to recreate
        print "now creating ",rast_calc_name2
        if not os.path.exists(rast_calc_name2) or overwrite_data:
            raster_algebra = Con(Raster(rast_calc_name)>=1,99)
            raster_algebra.save(rast_calc_name2)
        rast_calc_name3=rast_calc_name2[:-2] # making filename "methodnum"
        print "now creating ",rast_calc_name3
        if not os.path.exists(rast_calc_name3) or overwrite_data: # or fixme: #not sure if fixme is still required here
            raster_algebra = Con(IsNull(Raster(f3)),Raster(rast_calc_name2),Raster(f3))
            raster_algebra.save(rast_calc_name3)
        rast_calc_name35=rast_calc_name3[:-3]+"comp" #failed to include this is in first version - 3 lines new code
        print "now creating ",rast_calc_name35
        if not os.path.exists(rast_calc_name35) or overwrite_data: # or fixme:
            raster_algebra = Con(Raster(rast_calc_name3)>0,Con(IsNull(Raster(f35)),Raster(rast_calc_name),Raster(f35)))
            raster_algebra.save(rast_calc_name35)
        rast_calc_name4=source_top+source_in+"special\\"+stat_dir+"\\zz\\"+"pixpc1ge4all" #worried this is not giving right values
        f9=source_top+source_in+"special\\"+stat_dir+"\\bz\\"+"bpixpc1ge4all" #name change from 'fix' to 'pix'
        #f10=calc_files[("methodcomp99")] # mistake is using f10, really need to create a final methodcomp that merged bz/bmethodcomp&methodcomp99
        f10=rast_calc_name35
        print "now creating ",rast_calc_name4
        if not os.path.exists(rast_calc_name4) or overwrite_data: # or fixme: Con(Raster(f2)==0,
            print "really doing this at line 1270"
            raster_algebra = Con(IsNull(Raster(f2)),Con(IsNull(Raster(f9)),Con(Raster(f10)==4,Raster(f4),Con(Raster(f10)==8,Raster(f5),\
                        Con(Raster(f10)==16,Raster(f6),Con(Raster(f10)==32,Raster(f7),Con(Raster(f10)==64,Raster(f8)))))),Raster(f9)),\
                        Con(Raster(f2)==0,Con(IsNull(Raster(f9)),Con(Raster(f10)==4,Raster(f4),Con(Raster(f10)==8,Raster(f5),\
                        Con(Raster(f10)==16,Raster(f6),Con(Raster(f10)==32,Raster(f7),Con(Raster(f10)==64,Raster(f8)))))),Raster(f9))))
            raster_algebra.save(rast_calc_name4) #for some reason differs from equivalent statement in ArcMap RasterCalc
        rast_calc_name5=rast_calc_name4[:-12]+"missergall1" #RENAME TO differgall1
        print "now creating ",rast_calc_name5
        calc_files[("misser","all","1")]=rast_calc_name5
        if not os.path.exists(rast_calc_name5) or overwrite_data: # or fixme: #eliminated most of the other "fixme"
            raster_algebra = Raster(f1) - Raster(rast_calc_name4)
            raster_algebra.save(rast_calc_name5)
    bigD["calc_files"]=calc_files
    bigD["good_subdirs"]=preexisting_subdirs #added to handle the final wrapup
    if run_num==51:
        std_stats(calc_files,51,bigD) #should make last std_error calculation file for building of mapping(run_num_includesXX0)
    else:
        print "in bogus attempt at run_num = 52, not a real case"
    return bigD #end of 'wrap_up'
            
def normalize(bigD):
    print "\nNow running 'normalize' at line 1291"# need to change to handle pc2 and pc3 instead of just single pc1
    currVATdict_in={}
    current_fix=bigD["current_fix"]
    test_keys=bigD["calc_files"].keys()
    if ("misser","all") in test_keys:
        diff_fileX=bigD["calc_files"][("misser","all")]#this created in next_stats - old was bigD["calc_files"][("misser","all","1")]
    elif ("misser","all","1") in test_keys:
        diff_fileX=bigD["calc_files"][("misser","all","1")]#this created in do_pc2or3
    elif ("misser","all","2") in test_keys:
        diff_fileX=bigD["calc_files"][("misser","all","2")]#this created in do_pc2or3
    else:
        print "fatal error on line 1302"
        print test_keys
        haltnow=1/0
    for bands in ("1"): #,"2","3"): #new version running all three bands PC1, PC2, PC3 instead of just pc1
        diff_file=diff_fileX[:-1]+bands
        #diff_file=bigD["s_top"]+bigD["s_in"]+"special\\stat"+bigD["d_short"]+"\\za\\zdifferall"
        print "\nnow in 'normalize' at line 1308 looking for difference miss_file named: ",diff_file
        success,currVATdict_case=VAT_reader(currVATdict_in,diff_file,1,"csv",1) # only passing two parameters, using default for remainder
        #################################################################
        norm_stats,temp_stats,more_stats=norm_work(success,currVATdict_case,diff_file)
        ################################################################
        summary_filename=diff_file+"_normalize.csv"
        if not os.path.exists(summary_filename) or (bigD["overwrite_summaries"] and current_fix <= norm_stats["basic_info"][0]): # or fixme:
            fileout=open(summary_filename,"w")
            fileout.write("category,value,optionalA,optionalB (all Python indexes are 0-based)\n")
            fileout.write("input_filename = ,"+diff_file+"\n")
            fileout.write("round = ,"+str(norm_stats["basic_info"][0])+"\n")
            fileout.write("subdir = ,"+str(norm_stats["basic_info"][1])+"\n")
            fileout.write("rows in VAT = ,"+str(norm_stats["basic_info"][2])+"\n")
            fileout.write("total pixels = ,"+str(norm_stats["basic_info"][4])+"\n")
            fileout.write("0_position = ,"+str(norm_stats["0_position"][0])+",pixel count = ,"+str(norm_stats["0_position"][2])+"\n")
            fileout.write("max_position = ,"+str(norm_stats["max_count"][0])+",value = ,"+str(norm_stats["max_count"][1])+",pixel count = ,"+\
                          str(norm_stats["max_count"][2])+"\n")
            fileout.write("omit_low = ,"+str(norm_stats["best_case"][0])+"\n")
            fileout.write("omit_high = ,"+str(norm_stats["best_case"][1])+"\n")
            fileout.write("rows used = ,"+str(norm_stats["basic_info"][2]-norm_stats["best_case"][1]-norm_stats["best_case"][0])+"\n")
            fileout.write("weighted sd2v3 test = ,"+str(norm_stats["best_case"][2])+"\n")
            fileout.write("pixels retained = ,"+str(norm_stats["best_case"][3])+"\n")
            fileout.write("% within 1 stdev = ,"+str(norm_stats["best_case"][4])+"\n")
            fileout.write("% within 2 stdev = ,"+str(norm_stats["best_case"][5])+"\n")
            fileout.write("% within 3 stdev = ,"+str(norm_stats["best_case"][6])+"\n")
            fileout.write("% data retained = ,"+str(norm_stats["best_case"][7])+"\n")
            fileout.write("stdev of retained data = ,"+str(temp_stats[(norm_stats["best_case"][0],norm_stats["best_case"][1],"stdev")])+"\n")
            fileout.write("mean of retained data = ,"+str(temp_stats[(norm_stats["best_case"][0],norm_stats["best_case"][1],"part_mean")])+"\n")
            fileout.close()
            bigD["current_fix"]=current_fix+1
        bigD["norm_stats"]=norm_stats
    return bigD #end of 'normalize' function

def norm_work(success,currVATdict,diff_file): #dictionary fed in must be equivalent to what VAT_reader returns for a difference (recovered - real) GRID file
    #guts of the normalization process so it can be used elsewhere
    norm_stats={}
    which_round=bigD["where2start"] #value is 1 too high in all cases except 'za' and 'zz'
    invert=bigD["invert"] #use this to backtrack to actual round in use
    #currVATdict_in={}
    #currVATdict={}
    accumulator={}
    arcmaptable={}
    current_fix=bigD["current_fix"]
    entry_count=0 #compressed index to simplify later analysis
    value_sum=0 #a_key
    count_sum=0 #currVATdict_out
    count_max=0
    max_count_pos=0
    prod_sum=0 #their product
    SS=0
    temp_stats={}
    ends={}
    if success:
        diff_file_key=currVATdict.keys()
        bigD[("x","keys")]=diff_file_key
        bigD[("x","pairs")]=currVATdict
        print "\nLine 1364 in 'norm_work' for file = ",diff_file
        print "length of keys in diff_file =",str(len(diff_file_key))
        for a_key in diff_file_key:
            entry_count+=1
            this_value=a_key
            if a_key>2**31: this_value=int(a_key-(2**32)) # convert 2s complement back to standard negative number
            arcmaptable[this_value]=currVATdict[a_key]
            value_sum+=this_value
            if currVATdict[a_key]>count_max:
                count_max=currVATdict[a_key]
                #print "resetting count_max",str(count_max),str(entry_count)
            count_sum+=currVATdict[a_key]
        right_round=invert[diff_file[-14:-12]]
        print "subdirectory number ",str(right_round),"stored in: ",diff_file[-14:-12] #this is wrong for cleanup #51  
        print "entry_count =",str(entry_count) #OK
        print "value_sum =",str(value_sum) #problem probably was format for negative values
        print "count_sum =",str(count_sum) #OK 
        AMTkeys=arcmaptable.keys()
        AMTkeys.sort()
        half_length=int(0.5*len(AMTkeys))
        good2go=False
        for fill_me in range(-half_length,half_length):
            attempt=half_length+fill_me
            if attempt in AMTkeys:
                good2go=True
                break #possible problem if nothing ever satisfies condition
        if good2go:
            print "line1432 for attempt = ",str(attempt),str(arcmaptable[attempt])
            norm_stats["0_position"]=(-1,attempt,arcmaptable[attempt],99999) #placeholder if none of the rows happen to have an actual 0 value row
        else:
            if int(0.5*len(AMTkeys)) in AMTkeys:
                special_case=AMTkeys[int(0.5*len(AMTkeys))]
                norm_stats["0_position"]=(-1,special_case,arcmaptable[special_case],99999) #alternate placeholder for even stranger situations
            else:
                norm_stats["0_position"]=(-1,-111111,333333,99999)
                norm_stats["max_count"]=(-2,-222222,333335,99999)
        case_count=-1 #will match row number index in ArcMap tables
        print "line 1442"
        for a_key in AMTkeys:
            case_count+=1
            accumulator[(case_count,"value")]=a_key
            accumulator[(case_count,"pixels")]=arcmaptable[a_key]
            accumulator[(case_count,"product")]=a_key*arcmaptable[a_key]
            if a_key==0:
                norm_stats["0_position"]=(case_count,a_key,arcmaptable[a_key],accumulator[(case_count,"product")]) #table indexing uses Python 0-based method
            if arcmaptable[a_key]==count_max:
                norm_stats["max_count"]=(case_count,a_key,count_max,accumulator[(case_count,"product")])
        print "length of data table is:",str(len(arcmaptable)),"which should match",str(len(accumulator)/3.0)
        norm_stats["basic_info"]=(right_round,diff_file[-14:-12],entry_count,value_sum,count_sum) #value of 'which_round' is 1 too high after 'za'
        offset_guess=norm_stats["basic_info"][2]-(norm_stats["0_position"][0]+1)-(norm_stats["max_count"][0]+1)
        print norm_stats["basic_info"]
        print str(norm_stats["basic_info"][2])
        print "0_position",str(norm_stats["0_position"][0]+1)
        print "max_count",str(norm_stats["max_count"][0]+1)
        print str(offset_guess)
        if offset_guess<0: offset_guess=0
        guess_size=30
        part1H,part2H=offset_guess-guess_size,offset_guess+guess_size #+-20 from offset_guess is good range for the inner loop
        usable_limit=int((entry_count-offset_guess)/2)-15
        outer1,outer2=0,usable_limit #252 near max,204,210need to develop a dynamic method to set this range
        print str(outer1),str(outer2),"at line 1465"
        for omitL in range(outer1,outer2): #test value range near likely optimum
            for omitH in range(omitL+part1H,omitL+part2H): #test value range near likely optimum
                presumed_number=entry_count-omitL-omitH
                partcountsum=0
                partprodsum=0
                if presumed_number<6: #6 points needed for +- 1,2,3 stdev
                    print str(omitL),str(omitH)
                else:
                    for rows in range(omitL,entry_count-omitH):
                        if rows>=entry_count:
                            row=entry_count-1
                        else:
                            row=rows
                        partcountsum+=accumulator[(row,"pixels")]
                        partprodsum+=accumulator[(row,"product")]
                    if partcountsum==0: #
                        print "0 at line 1480",str(omitL),str(omitH)
                        partmean=0
                    else:
                        partmean=1.0*partprodsum/partcountsum
                    #print "case =",str(omitL),str(omitH),"gives",str(partcountsum),str(partprodsum),str(partmean)
                temp_stats[(omitL,omitH,"part_mean")]=partmean
                temp_stats[(omitL,omitH,"part_pixels")]=partcountsum
        ##repeat same loops but now already have means to allow the SS calcs to be run
        best_case=(-999,-999,999,0,999,999,888,777,555)
        print "offset_guess=",str(offset_guess),"outer1 and outer2 =",str(outer1),str(outer2)
        for omitL in range(outer1,outer2): #test value range near optimum
            for omitH in range(omitL+part1H,omitL+part2H): #(248,250): #test value range near likely optimum
                presumed_number=entry_count-omitL-omitH
                if presumed_number>=6: #skip cases with fewer than 6 points to use
                    partSS=0
                    for rows in range(omitL,entry_count-omitH):
                        if rows>=entry_count:
                            row=entry_count-1
                        else:
                            row=rows
                        partSS+=(accumulator[(row,"pixels")]*(accumulator[(row,"value")]-temp_stats[(omitL,omitH,"part_mean")])**2)
                    variance=partSS/(temp_stats[(omitL,omitH,"part_pixels")]-1)
                    std_dev=variance**0.5
                    temp_stats[(omitL,omitH,"stdev")]=std_dev
                    #print "for range =",str(omitL),str(omitH),"SS calcs, variance, std dev=",str(partSS),str(variance),str(std_dev)
                    #print "pixels included",str(temp_stats[(omitL,omitH,"part_pixels")])
                    for steps in range(1,4):
                        smallcounts=0
                        ends[("low",steps)]=temp_stats[(omitL,omitH,"part_mean")]-steps*temp_stats[(omitL,omitH,"stdev")]
                        ends[("high",steps)]=temp_stats[(omitL,omitH,"part_mean")]+steps*temp_stats[(omitL,omitH,"stdev")]
                        #print str(int(math.ceil(ends[("low",steps)]))),str(int(math.ceil(ends[("high",steps)])))
                        skip_counts=0
                        for small_steps in range(int(math.ceil(ends[("low",steps)])),1+int(math.ceil(ends[("high",steps)]))):
                            if small_steps in arcmaptable.keys():
                                smallcounts+=arcmaptable[small_steps]
                            else: #may need to translate from sequential counts to values really present
                                pass #for now but could track skips and do a few more
                        temp_stats[(omitL,omitH,steps,"pixels")]=smallcounts
                        temp_stats[(omitL,omitH,steps,"percent")]=100.0*smallcounts/temp_stats[(omitL,omitH,"part_pixels")]
                    temp_stats[(omitL,omitH,"sd2v3")]=2*abs(temp_stats[(omitL,omitH,3,"percent")]-99.7)+\
                                abs(temp_stats[(omitL,omitH,2,"percent")]-95.0) # weights the error at 3 standard deviations twice that at 2
                    #print temp_stats
                    if abs(best_case[2])>(temp_stats[(omitL,omitH,"sd2v3")]): best_case=(omitL,omitH,temp_stats[(omitL,omitH,"sd2v3")],\
                                temp_stats[(omitL,omitH,"part_pixels")],temp_stats[(omitL,omitH,1,"percent")],\
                                temp_stats[(omitL,omitH,2,"percent")],temp_stats[(omitL,omitH,3,"percent")],\
                                100.0*temp_stats[(omitL,omitH,"part_pixels")]/norm_stats["basic_info"][4],norm_stats["basic_info"][2]-\
                                omitL-omitH)
                    #print ends
                else:
                    pass
                    #print "Houston, we have a problem at line 1483. Presumed_number of points is too small at ",str(presumed_number)
                    #several items to be returned have not been set
        print "best_case=",best_case
    else:
        print "failed to properly read VAT file"
        haltnow=1/0
    #adding in simple std deviation calc for full data, no omitted tails of distribution, to handle PC1,2,3 cases in 'alt_normal'
    more_stats={}
    value_sum=0
    count_sum=0
    product_sum=0
    rows2do=len(arcmaptable)
    for a_row in range(rows2do): #arcmaptable dictionary is 0-based
        value_sum+=accumulator[(a_row,"value")]
        count_sum+=accumulator[(a_row,"pixels")]
        product_sum+=accumulator[(a_row,"product")]
    if count_sum==0:
        overall_avg=0
    else:
        overall_avg=(1.0*product_sum)/(1.0*count_sum)
    SS=0
    for a_row in range(rows2do): #is it 0-based or 1-based?
        SS+=accumulator[(a_row,"pixels")]*((accumulator[(a_row,"value")]-overall_avg)**2)
    if count_sum<=1:
        std_dev=0
    else:
        std_dev=(SS/(count_sum-1))**0.5
    more_stats["input_filename"]="X"
    more_stats["number_of_rows"]=rows2do
    more_stats["pixel_count_sum"]=count_sum
    more_stats["product_sum"]=product_sum
    more_stats["raw_mean"]=overall_avg
    more_stats["sums_of_squares"]=SS
    if count_sum<=1:
        more_stats["variance"]=0
    else:
        more_stats["variance"]=(SS/(count_sum-1))
    more_stats["standard_deviation"]=std_dev
    print "subdirectory number ",str(right_round) 
##    print "length of accumulator dictionary is ",str(len(accumulator))    
##    print "entry_count =",str(entry_count) #OK
##    print "value_sum =",str(value_sum) #problem probably was format for negative values
##    print "count_sum =",str(count_sum) #OK
##    print "prod_sum =",str(prod_sum)
##    raw_mean=(1.0*prod_sum)/(1.0*count_sum)
##    print "weight mean value =",str(raw_mean),"for file:",se_file
##    for sequential in range(1,1+entry_count):
##        SS+=accumulator[("count",sequential)]*(float(accumulator[("value",sequential)])-raw_mean)**2
##        #possibly also later on do some version of distribution tail clipping to produce 'closer to truly normal' data
##    variance_val=1.0*SS/(1.0*count_sum)
##    std_dev=variance_val**0.5
##    print "SS =",str(SS)
##    print "variance =",str(variance_val)
    print "standard deviation =",str(std_dev)
    ##################
    norm_stats["good_order"]=accumulator
    norm_stats["temp_stats"]=temp_stats
    norm_stats["best_case"]=best_case    
    ##################################################################################################
    return norm_stats,temp_stats,more_stats #end of 'norm_work' function

def export2excel(bigD):
    print "\nnow running 'export2excel' at line 1527"
    preexisting_subdirs=bigD["good_subdirs"]
    try:
        diff_fil=bigD["calc_files"][("misser","all")]
    except:
        diff_fil=bigD["s_top"]+bigD["s_in"]+"special\\stat"+bigD["d_short"]+"\\zz\\missergall"
    shorty=diff_fil[:-13]
    stat=diff_fil[-26:-14] # could also obtain as a split on "\"
    image=diff_fil[-43:-35]# could also obtain as a split on "\"
    storage={} #now open up the .csv values, store the entries, reformat a single file output
    if len(preexisting_subdirs)<52:
        num2do=len(preexisting_subdirs)+1
    else:
        num2do=52
    for subnum in range(1,num2do): #skip subnum=0 which is the 'xx' subdirectory
        subdir = preexisting_subdirs[subnum]
        if subnum==51:
            target=shorty+subdir+"\\missergall"+"1"
        else:
            target=shorty+subdir+"\\"+subdir[:1]+"misserall"+"1"# aim at zmisserall1_normalize or summary
        print target
        diff_sum=target+"_summary.csv"
        normalize=target+"_normalize.csv"
        diff_file=open(diff_sum,"r")
        norm_file=open(normalize,"r")
        diff_list=diff_file.readlines()
        norm_list=norm_file.readlines()
        diff_count=-1
        for things in diff_list:
            diff_count+=1
            if things[-1:]=="\n": things=things[:-1]#remove trailing newline character
            storage[("miss",subnum,diff_count,0)],storage[("miss",subnum,diff_count,1)]=things.split(",") #only 2 items in each line
        diff_file.close()                                                                                             
        norm_count=-1
        for things in norm_list:
            norm_count+=1
            if things[-1:]=="\n": things=things[:-1]#remove trailing newline character
            for item_num in range(len(things.split(","))):
                storage[("norm",subnum,norm_count,item_num)]=things.split(",")[item_num] #variance number of items per line
        norm_file.close()
    print "cases found = ",len(storage)
    #print storage
    bigD["storage"]=storage #need to debug the behavior at the final 'zz' round
    full_summary=shorty+"\\zz\\full_summary"+image+stat+".csv"
    print "now opening combined results output file: ",full_summary
    fileout=open(full_summary,"w")
    header_txt="Qmisserall filename,original image,gap defect image,\n"
    fileout.write(header_txt)
    try:
        stuff2write=storage[("norm",51,1,1)]+","+image+","+stat+"\n"
        print "line 1577"
    except: #want to fix this eventually so the try/except handling isn't needed
        stuff2write=diff_fil+","+image+","+stat+"\n"
        print "line 1580"
    fileout.write(stuff2write)
    header_txt="round,subdir,rows in VAT,total pixels,raw mean,raw stdev,location 0,pixels 0,location max,value max,pixels max,omit low,\
omit high,rows used,weighted sc2v3,pixels retained,% within 1 sd,% within 2 sd,% within 3 sd,% pixels retained,stdev retained,mean retained\n"
    fileout.write(header_txt)
    for sub_count in range(1,52):
        stuff2write=storage[("norm",sub_count,2,1)]+","+storage[("norm",sub_count,3,1)]+","+storage[("norm",sub_count,4,1)]+","
        stuff2write+=storage[("norm",sub_count,5,1)]+","+storage[("miss",sub_count,5,1)]+","+storage[("miss",sub_count,8,1)]+","
        stuff2write+=storage[("norm",sub_count,6,1)]+","+storage[("norm",sub_count,6,3)]+","+storage[("norm",sub_count,7,1)]+","
        stuff2write+=storage[("norm",sub_count,7,3)]+","+storage[("norm",sub_count,7,5)]+","+storage[("norm",sub_count,8,1)]+","
        stuff2write+=storage[("norm",sub_count,9,1)]+","+storage[("norm",sub_count,10,1)]+","+storage[("norm",sub_count,11,1)]+","
        stuff2write+=storage[("norm",sub_count,12,1)]+","+storage[("norm",sub_count,13,1)]+","+storage[("norm",sub_count,14,1)]+","
        stuff2write+=storage[("norm",sub_count,15,1)]+","+storage[("norm",sub_count,16,1)]+","+storage[("norm",sub_count,17,1)]+","
        stuff2write+=storage[("norm",sub_count,18,1)]+"\n"
        fileout.write(stuff2write)
    fileout.close()
    ##success,currVATdict=VAT_reader(currVATdict_in,diff_file,1,"csv",1) # only passing two parameters, using default for remainder
    return #end of 'export2excel' function

def do_pc2and3(bidD):
    print "\nnow running 'do_pc2and3' at line 1600"
    preexisting_subdirs=bigD["good_subdirs"]
    source_top=bigD["s_top"]
    source_in=bigD["s_in"]
    stat_dir=bigD["stat_dir"]
    #fixme=bigD["fixme"]
    overwrite_data=bigD["overwrite_data"]
    upper_files=bigD[("source","upper_files")]
    calc_files={}
    print source_top+source_in+"special\\"+stat_dir+"\\xx\\"
    f10=source_top+source_in+"special\\"+stat_dir+"\\zz\\"+"methodcomp"
    #problem upstream in the we didn't bother to create pc bands 2 and 3 in the 'xx' subdirectory
    for band in ("2","3"):
        f1=source_top+source_in+"special\\"+upper_files[(3,band,"no_more")]# need to fixup
        print f1
        print os.path.exists(f1)
        f4=source_top+source_in+"special\\"+stat_dir+"\\xx\\"+"pc"+band+"cutmi4" #lower_files[0,75][2] #32 from 74,75
        print f4
        print os.path.exists(f4)
        f5=source_top+source_in+"special\\"+stat_dir+"\\xx\\"+"pc"+band+"cutmi8" #lower_files[0,72][2] #64 from 76,72
        print f5
        print os.path.exists(f5)
        f6=source_top+source_in+"special\\"+stat_dir+"\\xx\\"+"pc"+band+"cutmi16" #lower_files[0,73][2] #8 from 72,73
        print f6
        print os.path.exists(f6)
        f7=source_top+source_in+"special\\"+stat_dir+"\\xx\\"+"pc"+band+"cutmi32" #lower_files[0,74][2] #16 from 73,74
        print f7
        print os.path.exists(f7)
        f8=source_top+source_in+"special\\"+stat_dir+"\\xx\\"+"pc"+band+"cutmi64" #lower_files[0,76][2] #4 from 75,76
        print f8
        print os.path.exists(f8)
        f9=source_top+source_in+"special\\"+stat_dir+"\\zz\\"+"pixpc"+band+"ge4all" #will hold pc2 or pc3 final fixed version
        print "now creating ",f9    
        if not os.path.exists(f9) or overwrite_data: # or fixme:
            raster_algebra = Con(Raster(f10)==4,Raster(f4),Con(Raster(f10)==8,Raster(f5),Con(Raster(f10)==16,Raster(f6),Con(Raster(f10)==32,\
                            Raster(f7),Con(Raster(f10)==64,Raster(f8))))))
            raster_algebra.save(f9) #for some reason differs from equivalent statement in ArcMap RasterCalc
        rast_calc_name5=source_top+source_in+"special\\"+stat_dir+"\\zz\\"+"missergall"+band #RENAME TO differgall1
        print "now creating ",rast_calc_name5
        calc_files[("misser","all",band)]=rast_calc_name5
        if not os.path.exists(rast_calc_name5) or overwrite_data: # or fixme: #eliminated most of the other "fixme"
            raster_algebra = Raster(f1) - Raster(f9)
            raster_algebra.save(rast_calc_name5)
    #next few lines create complex summary files for missing1,2,3 rasters - these need to be loaded using VAT_reader and summarized
    f11=rast_calc_name5[:-11]+"methodnum" #get 'methodnum' raster to allow creation of all summaries       
    for band in ("1","2","3"):
        rast_calc_name6=rast_calc_name5[:-11]+"methnumxmiss"+band #filename holding complex summary values
        f12=rast_calc_name5[:-1]+band
        calc_files[("complex",band)]=rast_calc_name6
        if not os.path.exists(rast_calc_name6) or overwrite_data:
            raster_algebra=Int((Raster(f12) + 500) + 1000*Raster(f11))# works if data range is -499 to +499 in difference of real and recovered
            raster_algebra.save(rast_calc_name6)
    bigD["calc_files"]=calc_files        
    return #end of 'do_pc2and3' function

def alt_normal(bigD):
    #this creates summaries and normalizations for the complex rasters representing pc2 and pc3, along with confirming data for pc1
    print "\nNow running 'alt_normal' at line 1657"# need to change to handle pc2 and pc3 instead of just single pc1
    currVATdict_in={}
    manyVATdict={}#will be a dictionary of dictionaries of dictionaries
    manyVATcount={}
    current_fix=bigD["current_fix"]
    preexisting_subdirs=bigD["good_subdirs"]
    source_top=bigD["s_top"]
    source_in=bigD["s_in"]
    stat_dir=bigD["stat_dir"]
    overwrite_data=bigD["overwrite_data"]
    for band in ("1","2","3"): #pc1 is recheck comparing methods, pc2 and pc3 are new data to test
        middleVATdict={}#one dictionary for each band
        middleVATcount={}
        alt_file=source_top+source_in+"special\\"+stat_dir+"\\zz\\"+"methnumxmiss"+band
        print "\nnow calling complex raster in 'alt normal' at line 1671 for file: ",alt_file
        success,currVATdict_case=VAT_reader(currVATdict_in,alt_file,1,"csv",1) # only passing two parameters, using default for remainder
        if success:
            for cycles in range(1,52): #52
                these_keys=currVATdict_case.keys()
                innerVATdict={}
                local_count=0
                find_count=0
                this_cycle=cycles
                if cycles==51: this_cycle=99#last end had ID of 99 instead of 51
                tcycle=str(this_cycle)# create both numeric and text string version
                if cycles<10: tcycle="0"+tcycle #pad tcycle so diff_file later on has same exact length for all cycles
                for a_key in these_keys:
                    local_count+=1
                    a_cycle=int(a_key/1000)
                    a_value=(a_key-(1000*a_cycle))-500 # turns complex case back to original value for given cycle
                    if a_cycle==this_cycle:
                        find_count+=1
                        innerVATdict[a_value]=currVATdict_case[a_key]
                print "for cycle number ",tcycle," there were ",str(find_count)," real rows out of a possible total of ",str(local_count)
                middleVATdict[this_cycle]=innerVATdict
                middleVATcount[this_cycle]=find_count
                diff_file=alt_file[:-13]+"pc"+band+preexisting_subdirs[cycles]+"missvalcyc"+tcycle # not sure if this name works well inside of 'norm_work' function
                norm_stats,temp_stats,more_stats=norm_work(success,innerVATdict,diff_file)#call to 'norm_work' function
                #need to write out the contents of normstats and temp_stats
                summary_filename=diff_file+"_normalize.csv"
                if not os.path.exists(summary_filename) or (bigD["overwrite_summaries"] and current_fix <= norm_stats["basic_info"][0]): # or fixme:
                    fileout=open(summary_filename,"w")
                    fileout.write("category,value,optionalA,optionalB (all Python indexes are 0-based)\n")
                    fileout.write("input_filename = ,"+diff_file+"\n")
                    fileout.write("round = ,"+str(norm_stats["basic_info"][0])+"\n")
                    fileout.write("subdir = ,"+str(norm_stats["basic_info"][1])+"\n")
                    fileout.write("rows in VAT = ,"+str(norm_stats["basic_info"][2])+"\n")
                    fileout.write("total pixels = ,"+str(norm_stats["basic_info"][4])+"\n")
                    fileout.write("0_position = ,"+str(norm_stats["0_position"][0])+",pixel count = ,"+str(norm_stats["0_position"][2])+"\n")
                    fileout.write("max_position = ,"+str(norm_stats["max_count"][0])+",value = ,"+str(norm_stats["max_count"][1])+",pixel count = ,"+\
                                  str(norm_stats["max_count"][2])+"\n")
                    fileout.write("omit_low = ,"+str(norm_stats["best_case"][0])+"\n")
                    fileout.write("omit_high = ,"+str(norm_stats["best_case"][1])+"\n")
                    fileout.write("rows used = ,"+str(norm_stats["basic_info"][2]-norm_stats["best_case"][1]-norm_stats["best_case"][0])+"\n")
                    fileout.write("weighted sd2v3 test = ,"+str(norm_stats["best_case"][2])+"\n")
                    fileout.write("pixels retained = ,"+str(norm_stats["best_case"][3])+"\n")
                    fileout.write("% within 1 stdev = ,"+str(norm_stats["best_case"][4])+"\n")
                    fileout.write("% within 2 stdev = ,"+str(norm_stats["best_case"][5])+"\n")   
                    fileout.write("% within 3 stdev = ,"+str(norm_stats["best_case"][6])+"\n")
                    fileout.write("% data retained = ,"+str(norm_stats["best_case"][7])+"\n")
                    if (norm_stats["best_case"][0],norm_stats["best_case"][1],"stdev") in temp_stats.keys():
                        fileout.write("stdev of retained data = ,"+str(temp_stats[(norm_stats["best_case"][0],norm_stats["best_case"][1],"stdev")])+"\n")
                    else:
                        fileout.write("stdev of retained data = , 9999.999\n") #dealing with cases lacking enough points in alt_norm method
                    if (norm_stats["best_case"][0],norm_stats["best_case"][1],"part_mean") in temp_stats.keys():
                        fileout.write("mean of retained data = ,"+str(temp_stats[(norm_stats["best_case"][0],norm_stats["best_case"][1],"part_mean")])+"\n")
                    else:
                        fileout.write("mean of retained data = , 8888.888\n")
                    fileout.close()
                    bigD["current_fix"]=current_fix+1
                bigD["norm_stats"]=norm_stats
                #now creating what was done by std_stats in the main process
                simpler_summary_name=diff_file+"_summary.csv"
                if not os.path.exists(simpler_summary_name) or bigD["overwrite_summaries"]:
                    fileout2=open(simpler_summary_name,"w")
                    fileout2.write("source,value\n")
                    fileout2.write("input_filename = ,"+alt_file+".."+tcycle+"\n")
                    fileout2.write("number of rows = ,"+str(more_stats["number_of_rows"])+"\n")
                    fileout2.write("pixel count sum = ,"+str(more_stats["pixel_count_sum"])+"\n")
                    fileout2.write("weighted valueXcount sum = ,"+str(more_stats["product_sum"])+"\n")
                    fileout2.write("raw_mean = ,"+str(more_stats["raw_mean"])+"\n")
                    fileout2.write("sums of squares = ,"+str(more_stats["sums_of_squares"])+"\n")
                    fileout2.write("variance = ,"+str(more_stats["variance"])+"\n")
                    fileout2.write("standard deviation = ,"+str(more_stats["standard_deviation"])+"\n")
                    fileout2.close()
            manyVATdict[band]=middleVATdict
            manyVATcount[band]=middleVATcount
        
    return bigD #end of 'alt_normal' function

def alt_export2excel(bigD):
    print "\nNow running 'alt_export2excel' at line 1742"
    preexisting_subdirs=bigD["good_subdirs"]
    diff_fil=bigD["s_top"]+bigD["s_in"]+"special\\stat"+bigD["d_short"]+"\\zz\\"
    stat=diff_fil[-16:-4] # could also obtain as a split on "\"
    image=diff_fil[-33:-25]# could also obtain as a split on "\"
    storage={} #now open up the .csv values, store the entries, reformat a single file output
    num2do=52
    for bands in ("1","2","3"):
        #DO ALL THREE PRINCIPAL COMPONENTS
        for subnum in range(1,num2do): #skip subnum=0 which is the 'xx' subdirectory
            subtext=str(subnum)
            if subnum<10: subtext="0"+subtext
            if subnum==51: subtext="99"
            subdir = preexisting_subdirs[subnum]
            target=diff_fil+"pc"+bands+subdir+"missvalcyc"+subtext#cycle number is at the end
            print target
            diff_sum=target+"_summary.csv" #have not yet created this for special PC2 and PC3 cases
            normalize=target+"_normalize.csv"
            diff_file=open(diff_sum,"r")
            norm_file=open(normalize,"r")
            diff_list=diff_file.readlines()
            norm_list=norm_file.readlines()
            diff_count=-1
            for things in diff_list:
                diff_count+=1
                if things[-1:]=="\n": things=things[:-1]#remove trailing newline character
                storage[("miss",subnum,diff_count,0)],storage[("miss",subnum,diff_count,1)]=things.split(",") #only 2 items in each line
            diff_file.close()
            norm_count=-1
            for things in norm_list:
                norm_count+=1
                if things[-1:]=="\n": things=things[:-1]#remove trailing newline character
                for item_num in range(len(things.split(","))):
                    storage[("norm",subnum,norm_count,item_num)]=things.split(",")[item_num] #variance number of items per line
            norm_file.close()
        print "cases found = ",len(storage)
        #print storage
        bigD["storage"]=storage #need to debug the behavior at the final 'zz' round
        full_summary=diff_fil+"PC"+bands+"_full_normalized_summary"+image+stat+".csv"
        print "now opening combined results output file: ",full_summary
        fileout=open(full_summary,"w")
        header_txt="methnumxmiss filename,original image,gap defect image,\n"
        fileout.write(header_txt)
        try:
            stuff2write=storage[("norm",51,1,1)]+","+image+","+stat+"\n"
            print "line 1787"
        except: #want to fix this eventually so the try/except handling isn't needed
            stuff2write=diff_fil+","+image+","+stat+"\n"
            print "line 1790"
        fileout.write(stuff2write)
        header_txt="round,subdir,rows in VAT,total pixels,raw mean,raw stdev,location 0,pixels 0,location max,value max,pixels max,omit low,\
omit high,rows used,weighted sc2v3,pixels retained,% within 1 sd,% within 2 sd,% within 3 sd,% pixels retained,stdev retained,mean retained\n"
        fileout.write(header_txt)
        for sub_count in range(1,52):
            stuff2write=storage[("norm",sub_count,2,1)]+","+storage[("norm",sub_count,3,1)]+","+storage[("norm",sub_count,4,1)]+","
            stuff2write+=storage[("norm",sub_count,5,1)]+","+storage[("miss",sub_count,5,1)]+","+storage[("miss",sub_count,8,1)]+","
            stuff2write+=storage[("norm",sub_count,6,1)]+","+storage[("norm",sub_count,6,3)]+","+storage[("norm",sub_count,7,1)]+","
            stuff2write+=storage[("norm",sub_count,7,3)]+","+storage[("norm",sub_count,7,5)]+","+storage[("norm",sub_count,8,1)]+","
            stuff2write+=storage[("norm",sub_count,9,1)]+","+storage[("norm",sub_count,10,1)]+","+storage[("norm",sub_count,11,1)]+","
            stuff2write+=storage[("norm",sub_count,12,1)]+","+storage[("norm",sub_count,13,1)]+","+storage[("norm",sub_count,14,1)]+","
            stuff2write+=storage[("norm",sub_count,15,1)]+","+storage[("norm",sub_count,16,1)]+","+storage[("norm",sub_count,17,1)]+","
            stuff2write+=storage[("norm",sub_count,18,1)]+"\n"
            fileout.write(stuff2write)
        fileout.close()
        ##success,currVATdict=VAT_reader(currVATdict_in,diff_file,1,"csv",1) # only passing two parameters, using default for remainder
    return #end of 'alt_export2excel' function

def need2create1st(bigD):
    #will handle initial creation of principal components and various block averaging calcs
    print "\nNow running 'need2create1st' at line 2015 with full functionality"
    overwrite_data=bigD["overwrite_data"]
    path2map=bigD["s_top"] #such as "F:\\LaCie2TB\\Landsat\\046x029\\" 
    map_dir=bigD["s_in"] #such as "20050526\\"
    if len(map_dir)>9: map_dir=bigD["s_in"][-9:] #handle special case of added directory levels when clipping Wenzhou
    previous_cell_size=arcpy.env.cellSize
    cases_done=0
    long_new_topdir=path2map+map_dir+"special"
    if not os.path.exists(long_new_topdir):
        print "\nCreating new subdirectory "+long_new_topdir+" which is top level storage folder "
        os.mkdir(long_new_topdir)
    else: print "storage directory ",long_new_topdir," already exists, no need to create it. Will look for unfinished calcs within it."
    image_bands = {}
    calc_files = {}
    possible_date=map_dir[:8]
    path,row=bigD["path_row"]
    sat_type,sat_status=date_check(path,row,possible_date)
    bigD["source","satellite"]=date_check(path,row,possible_date)#
    print "bigD[source,satellite] = ",bigD["source","satellite"][0]
    a_list=os.listdir(path2map+map_dir)
    ####a_list.sort()
    print "This is file and subdirectory list for ",path2map+map_dir
    excesses=0 #deal with numeric versus string sorting
    for anything in a_list:
        if anything[-4:]==".TIF" or anything[-4:]==".tif": # may need to handle other types of files
            if anything[-6:-5]=="B":
                index=anything[-6:-4]
            elif anything[-7:-6]=="B":
                index=anything[-7:-4]
            elif anything[-13:-12]=="B":
                index=anything[-13:-4]
            elif anything[-12:-5]=="sr_band": #level 2 naming convention
                index="B"+anything[-5:-4]
            elif anything[-13:-6]=="sr_band": #level 2 naming convention for cases longer than just one digit
                index="B"+anything[-6:-4]
                excesses+=1
            else:
                index=anything[:-4]
##            elif anything[-7:-6]=="B":
##                if anything[-5:-4]=="0": 
##                    index=anything[-7:-5]
##                elif anything[-5:-4]=="1" or anything[-5:-4]=="2": #handles B61 and B62 names differently than 3 lines down
##                    index=anything[-7:-4]
##            elif anything[-13:-12]=="B":
##                index=anything[-13:-4]
##            else:
##                index=anything[:-4]    
            image_bands[index]=anything # putting TIF files into a dictionary to return
        if anything[-10:-8]=="wc": #handling GRID files renamed to start with "wc" - code added Feb. 13, 2018
            if anything[-2:-1]=="b" or anything[-2:-1]=="B":
                index="B"+anything[-1:]
            else:
                index=anything[:]
            image_bands[index]=anything # putting TIF files into a dictionary to return
    f={}
    key_list=image_bands.keys()
    key_list.sort()
    do_not_use=""
    print "excesses = ",str(excesses)
    band_counter=0
    for image_keys in key_list: #useful bands: 7 for L5, 9 for L7, 10 for L8
        if image_bands[image_keys][-7:]=="BQA.TIF" or image_bands[image_keys][-6:]=="qa.tif" or image_bands[image_keys][-11:]=="aerosol.tif":
            do_not_use+=image_keys  #skip the QA band for PC analysis
        else:
            band_counter+=1
            storage="B"+str(band_counter)#works if fewer than 10 bands present and/or still at the first band in other cases
            if excesses==1:
                if band_counter==2:
                    storage="B"+str(10)
                elif band_counter>2:
                    storage="B"+str(band_counter-1)
            elif excesses==2:
                if band_counter==2 or band_counter==3:
                    storage="B"+str(8+band_counter)
                elif band_counter>3:
                    storage="B"+str(band_counter-2)
                
            print image_keys,"was the index key for:",image_bands[image_keys]+" stored as f["+storage+"]"
            f[storage]=path2map+map_dir+image_bands[image_keys]
    rast_calc_name=long_new_topdir+"\\m"+map_dir[2:-1]+"test" #usable pixels only
    calc_files["mYYMMDDtest"]=rast_calc_name
    if not os.path.exists(rast_calc_name) or overwrite_data:
        print "now creating file:",rast_calc_name
        if bigD["source","satellite"][0]=="L5":
            raster_algebra = Con(Raster(f["B1"]) > 0,Con(Raster(f["B2"]) > 0,Con(Raster(f["B3"]) > 0,Con(Raster(f["B4"]) > 0,Con(Raster(f["B5"]) > 0,\
            Con(Raster(f["B6"]) > 0,Con(Raster(f["B7"]) > 0,7,6),5),4),3),2),1),0)
        elif bigD["source","satellite"][0]=="L7":
            raster_algebra = Con(Raster(f["B1"]) > 0,Con(Raster(f["B2"]) > 0,Con(Raster(f["B3"]) > 0,Con(Raster(f["B4"]) > 0,Con(Raster(f["B5"]) > 0,\
            Con(Raster(f["B6"]) > 0,Con(Raster(f["B7"]) > 0,Con(Raster(f["B8"]) > 0,Con(Raster(f["B9"]) > 0,9,8),7),6),5),4),3),2),1),0)
        elif bigD["source","satellite"][0]=="L8" and band_counter==10: # choosing not to use band 9 from level 1 in new rasters
            raster_algebra = Con(Raster(f["B1"]) > 0,Con(Raster(f["B2"]) > 0,Con(Raster(f["B3"]) > 0,Con(Raster(f["B4"]) > 0,Con(Raster(f["B5"]) > 0,\
            Con(Raster(f["B6"]) > 0,Con(Raster(f["B7"]) > 0,Con(Raster(f["B8"]) > 0,Con(Raster(f["B9"]) > 0,Con(Raster(f["B10"]) > 0,\
            10,9),8),7),6),5),4),3),2),1),0)
        else: #more bands for Landsat8 data
            raster_algebra = Con(Raster(f["B1"]) > 0,Con(Raster(f["B2"]) > 0,Con(Raster(f["B3"]) > 0,Con(Raster(f["B4"]) > 0,Con(Raster(f["B5"]) > 0,\
            Con(Raster(f["B6"]) > 0,Con(Raster(f["B7"]) > 0,Con(Raster(f["B8"]) > 0,Con(Raster(f["B9"]) > 0,Con(Raster(f["B10"]) > 0,\
            Con(Raster(f["B11"]) > 0,11,10),9),8),7),6),5),4),3),2),1),0)
        raster_algebra.save(rast_calc_name)
        cases_done+=1
    if bigD["source","satellite"][0]=="L5":
        todolist = (1,2,3,4,5,6,7)
        topnum=7
    elif bigD["source","satellite"][0]=="L7":
        todolist = (1,2,3,4,5,6,7,8,9)
        topnum=9
    elif band_counter==10 and bigD["source","satellite"][0]=="L8": #might be Landsat8 skipping the high atmosphere band9
        todolist = (1,2,3,4,5,6,7,8,9,10) #band9 of Landsat8 imagery was mostly hardware artifact in some P/R image locations
        topnum=10
    else: #must be regular Landsat8
        todolist = (1,2,3,4,5,6,7,8,9,10,11) #band9 of Landsat8 imagery was mostly hardware artifact in some P/R image locations
        topnum=11    
    if bigD["source","satellite"][0]=="L8":
        pc_max_band=topnum # removing feature topnum-1
    else:
        pc_max_band=topnum    
    for band_num in todolist: #range(1,8):
        rast_calc_name=long_new_topdir+"\\b"+str(band_num)+"z"+map_dir[:-1]
        calc_files["s_b"+str(band_num)]=rast_calc_name
        if not os.path.exists(rast_calc_name) or overwrite_data:
            print "now creating file:",rast_calc_name  ##inner="B"+str(band_num)
            raster_algebra = Con(Raster(calc_files["mYYMMDDtest"]) == topnum,Con(Raster(f["B"+str(band_num)]) > 0,Raster(f["B"+str(band_num)])))
            raster_algebra.save(rast_calc_name)
            cases_done+=1
    print calc_files
    outPCfile=long_new_topdir+"\\princ_comp_"+map_dir[:-1]+".TXT"
    if bigD["source","satellite"][0]!="L8":
        outPC=long_new_topdir+"\\pc"+map_dir[2:-1]
    else:
        outPC=long_new_topdir+"\\pd"+map_dir[2:-1]#alternate name for principal components of Landsat8 allows reduced depth version to have traditional name
    if not os.path.exists(outPC) or overwrite_data:
        load_files=[]
        for band_num in range(1,topnum+1):
            ##if not (band_num==9 and bigD["source","satellite"][0]=="L8"): #skip band9 of Landsat8 data
            load_files.append(calc_files["s_b"+str(band_num)])
##        if bigD["source","satellite"][0]=="L8":
##            for band_num in (8,10,11):
##                load_files.append(calc_files["s_b"+str(band_num)])
        #still need to consider capturing more L7 bands
##        if bigD["source","satellite"][0]=="L8":
##            pc_max_band=topnum-1
##        else:
##            pc_max_band=topnum
        print "now building principal component value rasters in: ",outPC
        outPrincipalComp = PrincipalComponents(load_files,pc_max_band,outPCfile)
        outPrincipalComp.save(outPC)
        cases_done+=1
    #need to reduce pixel depth if Landsat8 raster data was used as image source - added April 4, 2018
    if bigD["source","satellite"][0]=="L8":
        newfile_target=long_new_topdir+"\\pc"+map_dir[2:-1]+"c"
        oldfile_target=long_new_topdir+"\\pd"+map_dir[2:-1]+"c"
        for band_num in range(1,pc_max_band+1):
            if not os.path.exists(newfile_target+str(band_num)) or overwrite_data:                 
            ##for band_num in range(1,pc_max_band+1):
                raster_algebra = Int(1.0*Raster(oldfile_target+str(band_num))/64) #rescale data range in principal components (drops to around 1000 points) first try was /128
                raster_algebra.save(newfile_target+str(band_num)) # gives the "pc" version name to the reduced range data
                #no worry anymore, all PCs to use now have common name
    arcpy.env.cellSize = 15 #size for all mn and sd - other rasters (mi,si,se) vary with compress level
    for bands in range(1,4):
        inRaster=outPC+"c"+str(bands)#general principal component bands to run block statistics on
        #arcpy.env.cellSize = 15 #size for all mn and sd - other rasters (mi,si,se) vary with compress level
        for compress in (4,8,16,32,64):
            nbr = NbrRectangle(compress,compress,"CELL") #CELL versus MAP units
            rast_calc_name=long_new_topdir+"\\p"+map_dir[2:-1]+"c"+str(bands)+"mn"+str(compress) #creates floating point mean blocks
            calc_files[("mean",bands,compress)]=rast_calc_name #store name for later reuse
            if not os.path.exists(rast_calc_name) or overwrite_data:
                raster_algebra=BlockStatistics(inRaster, nbr, "MEAN", "DATA")
                raster_algebra.save(rast_calc_name)
                cases_done+=1
            rast_calc_name=long_new_topdir+"\\p"+map_dir[2:-1]+"c"+str(bands)+"sd"+str(compress) #creates floating point stdev blocks
            calc_files[("stdev",bands,compress)]=rast_calc_name #store name for later reuse
            if not os.path.exists(rast_calc_name) or overwrite_data:
                raster_algebra=BlockStatistics(inRaster, nbr, "STD", "DATA")
                raster_algebra.save(rast_calc_name)
                cases_done+=1
    for compress in (4,8,16,32,64):
        arcpy.env.cellSize = compress*15 #varies in proportion to compress level for mi,si,se
        for bands in range(1,4):
            rast_calc_name=long_new_topdir+"\\p"+map_dir[2:-1]+"c"+str(bands)+"mi"+str(compress) #creates integer mean blocks
            calc_files[("mean_int",bands,compress)]=rast_calc_name #store name for later reuse
            if not os.path.exists(rast_calc_name) or overwrite_data:
                raster_algebra=Int(Raster(calc_files[("mean",bands,compress)]))
                raster_algebra.save(rast_calc_name)
                cases_done+=1
            rast_calc_name=long_new_topdir+"\\p"+map_dir[2:-1]+"c"+str(bands)+"si"+str(compress) #creates integer stdev blocks
            calc_files[("stdev_int",bands,compress)]=rast_calc_name #store name for later reuse
            if not os.path.exists(rast_calc_name) or overwrite_data:
                raster_algebra=Int(Raster(calc_files[("stdev",bands,compress)]))
                raster_algebra.save(rast_calc_name)
                cases_done+=1
            rast_calc_name=long_new_topdir+"\\p"+map_dir[2:-1]+"c"+str(bands)+"se"+str(compress) #creates integer standard error blocks
            calc_files[("se",bands,compress)]=rast_calc_name #store name for later reuse
            if not os.path.exists(rast_calc_name) or overwrite_data:
                raster_algebra=Int(100.0*Raster(calc_files[("stdev_int",bands,compress)])/Raster(calc_files[("mean_int",bands,compress)]))
                raster_algebra.save(rast_calc_name)
                cases_done+=1
    bigD["calc_files"]=calc_files
    arcpy.env.cellSize=previous_cell_size #restore original environment cellSize
    if cases_done==0:
        return 0
    else:
        print "a total of ",str(cases_done)," rasters were newly created"
        return -1 #end of 'need2create1st' function
    
def canskip2real(bigD): #tests for whether data exist to jump right to calculation of truly missing entries
    print "now at line 2169 in 'canskip2real'"
    bigD=get_targets(bigD,"defect") #line 142
    show_connections(bigD[("defect","lower_files")]) #line 461 #(defect_lower_files)
    show_connections(bigD[("defect","upper_files")]) #line 461 #(defect_upper_files)
    bigD=get_targets(bigD,"source")
    show_connections(bigD[("source","lower_files")]) #(source_lower_files)
    show_connections(bigD[("source","upper_files")]) #(source_upper_files)
    #bigD[("defect","image_bands")] names of files needing to be fixed
    #bigD[("source","image_bands")]
    #bigD[("source","lower_files")][51,9] methodnum
    #bigD[("source","lower_files")][51,7] methodcomp
    #testing for presence of necessary files
    methodcomp_file=bigD["s_top"]+bigD["s_in"]+"special\\stat"+bigD["d_in"]+"zz\\methodcomp"
    methodnum_file=bigD["s_top"]+bigD["s_in"]+"special\\stat"+bigD["d_in"]+"zz\\methodnum"
    if os.path.exists(methodcomp_file) and os.path.exists(methodnum_file): #compression cycle number file #compression rule file
        print "preexisting methodcomp and methodnum files found - ready to try repairing missing data"
        print "these are the files needing to have data repaired in their gaps"
        print bigD[("defect","image_bands")]
        allgood = True
    else:
        print "problems at line 2191. File(s) not found for ",methodcomp_file,methodnum_file
        allgood = False
    return allgood,methodcomp_file,methodnum_file #end of 'canskip2real' functions

def repair_targets(bigD,methodcomp,methodnum):
    calc_files={}
    overwrite_data=bigD["overwrite_data"]
    print "now at line 2182 in 'repair_targets'"
    #first check on pre-existence of repair target subdirectory
    improved=bigD["d_top"]+bigD["d_in"]+"improved\\"
    if not os.path.exists(improved): os.mkdir(improved)
    deeper=bigD["d_top"]+bigD["d_in"]+"improved\\deeper\\"
    if not os.path.exists(deeper): os.mkdir(deeper)
    #next need to create the 5 compression levels for each of the original bands
    previous_cell_size=arcpy.env.cellSize
    cases_done=0
    key_list2=bigD[("defect","image_bands")].keys()
    key_list=[]
    for each_key in key_list2: # odd MTL.TIF file present in at least one Landsat7 dataset
        if len(each_key) in [2,3]:
            key_list.append(each_key) #remove(each_key)
    key_list.sort()
    for each_key in key_list: #Con(IsNull("methodcomp"),Con("20040616b62">0,"20040616b62"),Con("methodcomp">0,Con("20040616b62">0,"20040616b62")))
        arcpy.env.cellSize=15
        raw_bands=bigD["d_top"]+bigD["d_in"]+bigD[("defect","image_bands")][each_key]
        original=deeper+bigD["d_in"][0:8]+each_key
        if not os.path.exists(original) or overwrite_data:
            print original
            #raster_algebra0=Con(Raster(methodcomp)>0,Con(Raster(raw_bands)>0,Raster(raw_bands)))
            #raster_algebra0=Con(IsNull(Raster(methodcomp)),Con(Raster(raw_bands)>0,Raster(raw_bands)))
            raster_algebra0=Con(Raster(raw_bands)>0,Raster(raw_bands))
            raster_algebra0.save(original)            
    for each_key in key_list:
        original=deeper+bigD["d_in"][0:8]+each_key
        for compress in (4,8,16,32,64):
            arcpy.env.cellSize=15
            nbr = NbrRectangle(compress,compress,"CELL") #CELL versus MAP units
            rast_calc_name=deeper+each_key+bigD["d_in"][2:8]+"mn"+str(compress) #creates floating point mean blocks
            calc_files[("mean",each_key,compress)]=rast_calc_name #store name for later reuse
            if not os.path.exists(rast_calc_name) or overwrite_data:
                print "now at line 222 doing ",rast_calc_name
                raster_algebra=BlockStatistics(original, nbr, "MEAN", "DATA") #need to remove 0s from these block stat calculations
                raster_algebra.save(rast_calc_name)
                cases_done+=1
            rast_calc_name2=deeper+each_key+bigD["d_in"][2:8]+"mi"+str(compress) #creates integer value blocks
            calc_files[("mean_int",each_key,compress)]=rast_calc_name2 #store name for later reuse
            if not os.path.exists(rast_calc_name2) or overwrite_data:
                arcpy.env.cellSize = compress*15
                raster_algebra=Int(rast_calc_name)
                raster_algebra.save(rast_calc_name2)
                cases_done+=1
        arcpy.env.cellSize=15
        f10=methodcomp
        f4=calc_files[("mean_int",each_key,4)]
        f5=calc_files[("mean_int",each_key,8)]
        f6=calc_files[("mean_int",each_key,16)]
        f7=calc_files[("mean_int",each_key,32)]
        f8=calc_files[("mean_int",each_key,64)]
        f9=improved+bigD["d_in"][0:8]+each_key+"gp" #gp has values in pixels that had been gaps, but not yet more fully
        print "now creating ",f9
        calc_files[("gap_repair",each_key)]=f9
        if not os.path.exists(f9) or overwrite_data: # or fixme:
##            raster_algebra = Con(Raster(f10)==4,Raster(f4),Con(Raster(f10)==8,Raster(f5),Con(Raster(f10)==16,Raster(f6),Con(Raster(f10)==32,\
##                            Raster(f7),Con(Raster(f10)==64,Raster(f8))))))
            raster_algebra = Con(Raster(f10)==4,Con(IsNull(Raster(f4)),Con(IsNull(Raster(f5)),Con(IsNull(Raster(f6)),Raster(f7),Raster(f6)),\
                            Raster(f5)),Raster(f4)),Con(Raster(f10)==8,Con(IsNull(Raster(f5)),Con(IsNull(Raster(f6)),Raster(f7),Raster(f6)),\
                            Raster(f5)),Con(Raster(f10)==16,Con(IsNull(Raster(f6)),Raster(f7),Raster(f6)),Con(Raster(f10)==32,\
                            Raster(f7),Con(Raster(f10)==64,Raster(f8))))))
            raster_algebra.save(f9) #for some reason differs from equivalent statement in ArcMap RasterCalc
        f0=improved+bigD["d_in"][0:8]+each_key+"fx" #fx has full coverage restored
        print "now pooling results into final raster:",f0
        if not os.path.exists(f0) or overwrite_data: #force a rewrite
            raster_algebra2=Con(IsNull(Raster(original)),Raster(f9),Con(Raster(original)==0,Raster(f9),Raster(original)))
            raster_algebra2.save(f0)

    #then need to use the rules in methodcomp and methodnum to create estimates for missing values
    print str(cases_done)
    bigD[("real_repair")]=calc_files
    #haltnow=1/0
    arcpy.env.cellSize=previous_cell_size #restore original environment cellSize
    final_report(bigD,methodcomp,methodnum) # "gp" repairs for each band as bigD[("real_repair")][calc_files[("gap_repair",each_key)]] key_list=bigD[("defect","image_bands")].keys()
    return #end of 'repair_targets'

def final_report(bigD,methodcomp,methodnum): #in charge of summarizing repairs of actual Landsat7 SLC failure data
    overwrite_data=bigD["overwrite_data"]
    print "now running 'final_report' at line 2293"
    improved=bigD["d_top"]+bigD["d_in"]+"improved\\"
    currVATdict_in={}
    compress={1:4,2:8,3:16,4:32,5:64}
    key_list2=bigD[("defect","image_bands")].keys()
    key_list=[]
    for each_key in key_list2: # odd MTL.TIF file present in at least one Landsat7 dataset
        if len(each_key) in [2,3]:
            key_list.append(each_key) #remove(each_key)
    key_list.sort()
    calc_files=bigD[("real_repair")]
    band_num=0
    gp={}
    for each_key in key_list:
        band_num+=1
        gp[band_num]=calc_files[("gap_repair",each_key)] #dictionary with names of gap_fill rasters for each band
    success,currVATdict_case=VAT_reader(currVATdict_in,methodcomp,1,"csv",1) # only passing two parameters, using default for remainder
    if success:
        these_keys=currVATdict_case.keys()
        these_keys.sort()
        local_count=0
        innerVATdict={}
        for a_key in these_keys:
            local_count+=1
            a_value=compress[local_count]
            if a_value!=a_key:
                print str(a_value),str(a_key),"mismatch at line 2320"
                print currVATdict_case
                halt_now=1/0
            else:
                innerVATdict[a_value]=currVATdict_case[a_key]
        print str(local_count),"rows present in VAT of ",methodcomp
        cases_mcomp=0
        for new_key in innerVATdict.keys():
            cases_mcomp+=innerVATdict[new_key]
        print "total pixel count = ",str(cases_mcomp)    
    else:
        print "trouble at line 2331 reading file: ",methodcomp
    success,currVATdict2_case=VAT_reader(currVATdict_in,methodnum,1,"csv",1) # only passing two parameters, using default for remainder
    if success:
        these_keys2=currVATdict2_case.keys()
        these_keys2.sort()
        local_count2=0
        innerVATdict2={}
        for a_key in these_keys2:
            local_count2+=1
            a_value=local_count2
            #####if a_value!=a_key and a_key!=99:
            if a_value not in these_keys2 and a_value!=51 and (a_value>51 or a_value<0): #####
                print str(a_value),str(a_key),"mismatch at line 2343"
                print currVATdict2_case
                halt_now=1/0
            else:
                innerVATdict2[a_value]=currVATdict2_case[a_key] #remaps case 99 down to 51 automatically
        print str(local_count2),"rows present in VAT of ",methodnum
        cases_mnum=0
        for new_key in innerVATdict2.keys():
            cases_mnum+=innerVATdict2[new_key]
        print "total pixel count = ",str(cases_mnum)    
    else:
        print "trouble at line 2354 reading file: ",methodnum
    #combine methodcomp and methodnum to allow measurement of differential use of compression levels over cycle numbers
    pooled_info=improved+"methcompxnum"
    if not os.path.exists(pooled_info) or overwrite_data:
        raster_algebra=Raster(methodnum) + 100 * Raster(methodcomp)
        raster_algebra.save(pooled_info)
    success,currVATdict3_case=VAT_reader(currVATdict_in,pooled_info,1,"csv",1) # only passing two parameters, using default for remainder
    if success:
        these_keys=currVATdict3_case.keys()
        these_keys.sort()
        local_count=0
        #innerVATdict3={}
        full_dict={}
        for i in range(1,52):
            for j in (4,8,16,32,64):
                full_dict[(i,j)]=0
        resorted_dict={}
        for a_key in these_keys:
            local_count+=1
            a_value=a_key
            comp_num=Int(a_key/100.0)
            cycle=a_key-100*comp_num
            full_dict[(cycle,comp_num)]=currVATdict3_case[a_key]
            resorted_dict[local_count]=(a_key,cycle,comp_num)
            #innerVATdict3[a_value]=currVATdict_case[a_key]
            
        print str(local_count),"rows present in VAT of ",pooled_info
        cases_pooled=0
        for new_key in currVATdict3_case.keys():
            cases_pooled+=currVATdict3_case[new_key]
        print "total pixel count = ",str(cases_pooled)    
    else:
        print "trouble at line 2376 reading file: ",pooled_info
    final_output=improved+"methcompxnum_defect"+bigD["d_in"][:8]+"_source"+bigD["s_short"]+".csv" #need more ID entry in the name
    fileout=open(final_output,"w")
    header_txt="cycle,compress4X,compress8X,compress16X,compress32X,compress64X,totals\n"
    fileout.write(header_txt)
    for i in range(1,52):
        cycle_sum=full_dict[(i,4)]+full_dict[(i,8)]+full_dict[(i,16)]+full_dict[(i,32)]+full_dict[(i,64)]
        stuff2write=str(i)+","+str(full_dict[(i,4)])+","+str(full_dict[(i,8)])+","+str(full_dict[(i,16)])+","+str(full_dict[(i,32)])+\
                     ","+str(full_dict[(i,64)])+","+str(cycle_sum)+"\n"
        fileout.write(stuff2write)
    stuff2write="source_file without gaps = ,"+bigD["s_top"]+","+bigD["s_in"]+"\n"
    fileout.write(stuff2write)
    stuff2write="target_file with defects = ,"+bigD["d_top"]+","+bigD["d_in"]+"\n"
    fileout.write(stuff2write)
    fileout.close()
    print resorted_dict
    return #end of 'final_report'

### Main program of sequential calls to working functions
if bigD["jump2real"][0]==True: #try to use previous results
    #try to create actual missing data in either case
    status_here,methodcomp,methodnum=canskip2real(bigD)
    if status_here == True:
        repair_targets(bigD,methodcomp,methodnum)
        
if bigD["jump2real"][0]==False: #run all the previously built activities first
    watching=bigD["where2start"]
    simple=0
    while bigD["where2start"]<51: #<50 is normal termination for full run
        simple+=1
        watching=bigD["where2start"]
        print "\nnow in 'main'",str(watching)
        first_things_first(sys.argv) #line 865
        use_walk(bigD["d_top"]+bigD["d_in"],True) #line 470 #True prints out full defect directory information, False prints none
        use_walk(bigD["s_top"]+bigD["s_in"],False) #line 470 #True prints out full clean source directory information, False prints none

        ##defect_band_list,defect_upper_files,defect_lower_files = get_targets(defect_topmost,defect_inner,"defect",source_short)
        bigD=get_targets(bigD,"defect") #line 142
        show_connections(bigD[("defect","lower_files")]) #line 461 #(defect_lower_files)
        show_connections(bigD[("defect","upper_files")]) #line 461 #(defect_upper_files)

        simple+=need2create1st(bigD) # return 0 if top_level pc and compressions exist, -1 if they had to be newly created
        print str(simple)
        #haltnow=1/0
        ##source_band_list,source_upper_files,source_lower_files = get_targets(source_topmost,source_inner,"source",defect_short)
        bigD=get_targets(bigD,"source")
        show_connections(bigD[("source","lower_files")]) #(source_lower_files)
        show_connections(bigD[("source","upper_files")]) #(source_upper_files)

        #se_by_pc_compress_dict,summary_dict,accume_dict,accume2_dict,accume3_dict=make_se_tables(source_topmost,source_inner,source_upper_files)
        bigD=make_se_tables(bigD)
        #full_cutoff_list=cutoff_list(accume3_dict,summary_dict)
        bigD=cutoff_list(bigD)

        #status_start,subdir_good,status_current,where2start,final_dir_finished,this_dir=validate(source_topmost,source_inner,source_lower_files,source_upper_files,defect_short)
        ##bigD=validate(bigD)
        ##print "line 788, this_dir=",bigD["this_dir"]

        #ready2go=next_stats(status_start,subdir_good,status_current,where2start,final_dir_finished,source_topmost,source_inner,source_upper_files,\
        #                    this_dir,defect_topmost,defect_inner,defect_short,defect_band_list,full_cutoff_list)

        bigD=validate(bigD)
        print "line 1267, this_dir=",bigD["this_dir"]
        bigD=next_stats(bigD)
        print "where2start = ",str(bigD["where2start"])
        if bigD["where2start"]<51: bigD=normalize(bigD) #had set this to 52 at one point - normally can't do round 51 normalize till after wrap_up has created the special files in 'zz'
        print "in main at line 1325",str(bigD["where2start"]),"with 'current_fix' = ",str(bigD["current_fix"])
        if watching==bigD["where2start"] and watching>=3: pass #haltnow=1/0
        print str(watching)
        print str(bigD["where2start"])
        if simple>=53: haltnow=1/0 #emergency break/brake
    bigD=wrap_up(bigD)
    do_pc2and3(bigD)
    bigD=normalize(bigD)#either normalize or alt_normal could be ahead of or behind each other
    bigD=alt_normal(bigD)#handles the alternate version for PC1 and only versions of PC2 and PC3 normalized stats
    export2excel(bigD)
    alt_export2excel(bigD)
    ##do_pc2and3(bidD)
    ###already_done=calculate_principal_comps(source_topmost,source_inner,source_band_list,source_upper_file_list) #boolean value for old vs new P

    #create_mapping(source_topmost,source_inner,source_band_list,source_upper_file_list)
    
#try to create actual missing data in either case
if bigD["jump2real"][0] == False and bigD["jump2real"][1] == True:
    status_here,methodcomp,methodnum=canskip2real(bigD)
    if status_here == True:
        repair_targets(bigD,methodcomp,methodnum)
    
    
